look up bibliographical information from an arxiv id

this python script takes one or more arxiv ids as input (command line arguments) and gives bibtex entries back which carry the bibliographic information.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
 
# get the arxiv id
import sys
from string import strip, split
for arg in sys.argv[1:]:
    arg = strip(arg)
    arg = strip(arg, chars="arxiv:")
    arg = strip(arg, chars="http://")
    arg = strip(arg, chars="www.")
    arg = strip(arg, chars="arxiv.org/abs/")
    arg = split(arg, sep='v')[0]
    xid = strip(arg)
 
    # download the xml
    import urllib
    from xml.dom import minidom
    usock = urllib.urlopen('http://export.arxiv.org/api/query?id_list='+xid)
    xmldoc = minidom.parse(usock)
    usock.close()
 
    print xmldoc.toxml()
    print ""
 
    d = xmldoc.getElementsByTagName("entry")[0]
 
    date = d.getElementsByTagName("updated")[0].firstChild.data
    text_year = date[:4]
 
    title = d.getElementsByTagName("title")[0]
    text_title = title.firstChild.data#.encode('ascii', 'ignore')
 
    authorlist = []
    first = True
    for person_name in d.getElementsByTagName("author"):
        # get names
        name = person_name.getElementsByTagName("name")[0]
        text_name = name.firstChild.data#.encode('ascii', 'ignore')
        text_given_name = ' '.join(text_name.split()[:-1])
        text_surname = text_name.split()[-1]
        authorlist.append(text_surname+", "+text_given_name)
        #first author?
        if first:
            text_first_author_surname = text_surname
            first = False
 
    # output
 
    print "@MISC{"+text_first_author_surname+text_year[-2:]+","
    print "author = {"+" and ".join(authorlist)+"},"
    print "title = {"+text_title+"},"
    print "year = {"+text_year+"},"
    print "eprint = {"+xid+"},"
    print "URL = {http://www.arxiv.org/abs/"+xid+"},"
    print "}"

2 Responses to “look up bibliographical information from an arxiv id”

  1. Dan Stahlke says:

    Thank you for this script, it has saved me tons of time! I have made two small changes, first to accept a greater variety of inputs (you can give a url to the pdf) and second to handle funny characters in author’s names. If you would like a copy of these changes, send me an email.

  2. […] a python script that returns the corresponding bib-entry from an arxiv ID, you can find it on http://www.thamnos.de/misc/look-up-bibliographical-information-from-an-arxiv-id/ . If you save it e.g. as arxiv2bib.py, you can call it as arxiv2bib.py 1234.5678 or as arxiv2bib.py […]

Leave a Reply