find your publications in a bibtex file

starting from a bib file with many entries, among them publications where you are co-author, you would like to extract only these and list them in a tex file and on an html webpage.

you could simply use bibtex2html which does a very nice job but doesn’t allow you to fine-tune the ordering of the results (i think). so the approach i’m presenting here combines this tool with pybtex which i guess would be able to do the whole job by itself.

this script produces tex and html file

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
# Does two tasks:
#
# 1) Generates pubs.html as refereed pubs then non-refereed pubs,
#    sorted reverse-chronologically
# 2) Generates LaTeX-formatted publications, same format
 
from operator import itemgetter
from pybtex.database.input import bibtex
 
bibfile = "/home/buschi/cv/db.bib"
texout = "sbusch_publications.tex"
htmout = "sbusch_publications.html"
 
parser = bibtex.Parser()
bib_data = parser.parse_file(bibfile)
 
# take everything with my name
sbusch_all = {}
for key in bib_data.entries.keys():
    try:
        authors = bib_data.entries[key].persons['author']
        for author in authors:
            try:
                if ((author.first()[0] == u'Sebastian') and (author.last()[0] == u'Busch')):
                    sbusch_all[key] = bib_data.entries[key]
            except IndexError: # no first / last name
                pass
    except KeyError: # no author (e.g. a collection)
        pass
 
# categorise
sbusch_nonref = {}
sbusch_nonref_sort = []
sbusch_proc = {}
sbusch_proc_sort = []
sbusch_ref = {}
sbusch_ref_sort = []
for key in sbusch_all.keys():
    publ = sbusch_all[key]
    year = -int(publ.fields['year'].strip('-')) # strip for 2009--; - to get the ones with largest years first
    for i in range(len(publ.persons['author'])):
        if ((publ.persons['author'][i].first()[0] == 'Sebastian') and (publ.persons['author'][i].last()[0] == 'Busch')):
            mypos = i
            break
    if ((publ.type == "techreport") or (publ.type == "mastersthesis") or (publ.type == "phdthesis") or (publ.type == "misc") or ("nonrefereed" in publ.fields.keys())):
        sbusch_nonref[key] = sbusch_all[key]
        sbusch_nonref_sort.append((key, year, mypos))
    elif (publ.type == "inproceedings"):
        sbusch_proc[key] = sbusch_all[key]
        sbusch_proc_sort.append((key, year, mypos))
    elif (publ.fields['journal'] != "in preparation"):
        try:
            vol = publ.fields['volume']
        except KeyError:
            vol = None
        if (vol != "submitted"):
            sbusch_ref[key] = sbusch_all[key]
            sbusch_ref_sort.append((key, year, mypos))
 
# sort
# the newest publications first
#     the ones where i'm first author first
sbusch_nonref_sorted = [i[0] for i in sorted(sbusch_nonref_sort, key=itemgetter(1,2))]
sbusch_proc_sorted = [i[0] for i in sorted(sbusch_proc_sort, key=itemgetter(1,2))]
sbusch_ref_sorted = [i[0] for i in sorted(sbusch_ref_sort, key=itemgetter(1,2))]
for i in [(sbusch_nonref_sorted, 'nonref.txt'), (sbusch_proc_sorted, 'proc.txt'), (sbusch_ref_sorted, 'ref.txt')]:
    f = open(i[1], 'w')
    for line in i[0]:
        f.write(str(line)+"\n")
    f.close()
 
from os import system, remove
from re import compile, DOTALL
 
pubs_html = ''
 
# iterate over refereed and non-refereed
for o in [ ['Refereed Publications', '--no-footer', 'ref.txt'], ['Proceedings', '--no-footer', 'proc.txt'], ['Non-Refereed Publications', '', 'nonref.txt'] ]:
    # output is pubs.html
    pubs_html += '<h1>%s</h1>' % o[0]
 
    # add non-/refereed pubs to pubs.html
    # sort by reverse-date; don't generate keys; use sbusch_web.bbl
    # writes into sbusch.html
    system("bibtex2html -q -d -r -dl -nobibsource -nokeys -m macros.tex -citefile %s -s sbusch_web -nodoc %s -o auxfile %s" % (o[2], o[1], bibfile))
    try:
        sbusch_html = open('auxfile.html', 'r')
        pubs_html += sbusch_html.read()
        sbusch_html.close()
    except IOError:
        pass
 
# change \"[ bib ]\" into \"[&nbsp;bib&nbsp;]\"
biblinkRE = compile(r'\[ (<a href="[^"]+">bib</a>) ]')
pubs_html = biblinkRE.sub(r'[&nbsp;\1&nbsp;]', pubs_html)
# remove explicit line breaks
deletebrRE = compile(r'<br />')
pubs_html = deletebrRE.sub('', pubs_html)
 
# write into output file pubs.html
pubs_html_file = open(htmout, 'w')
pubs_html_file.write(pubs_html)
pubs_html_file.close()
 
#########
 
# now we're going to generate a LaTeX version of my pubs, also sorted
 
# RE selects bibitems from bbl (key stored in group(1), entry in group(2))
bibitemRE = compile(r'\\bibitem\[\]\{([^\}]+)\}(.*)', DOTALL)
# refs.tex will contain the LaTeX version of my pubs
refs_tex = open(texout, 'w')
 
refs_tex.write("\section{Publications}\n")
 
# separately loop through refereed and non-refereed pubs
for o in [ ['ref.txt', 'Articles in Refereed Scientific Journals'], ['proc.txt', 'Articles in Conference Proceedings'], ['nonref.txt', 'Other'] ]:
  auxfile = open(o[0], 'r')
  sorted_keys = auxfile.read().split('\n')
  auxfile.close()
 
  refs_tex.write('%s\n\\renewcommand\\refname{%s}\n\\begin{bibunit}[unsrt]\n' % ("%", o[1]))
  for key in sorted_keys[:-1]:
      refs_tex.write('\\nocite{'+str(key)+'}\n')
  refs_tex.write('\\putbib[%s]\n\\end{bibunit}\n' % bibfile[:-4])
 
refs_tex.close()
 
# clean up temp files
try:
    remove("auxfile.html")
except OSError:
    pass
try:
    remove("nonref.txt")
except OSError:
    pass
try:
    remove("proc.txt")
except OSError:
    pass
try:
    remove("ref.txt")
except OSError:
    pass

the tex file can then be included in another document and processed with bibtex.

Leave a Reply