this python script takes one or more doi as input (command line arguments) and gives bibtex entries back which carry the information provided by crossref. you have to register there and enter the api key they give you in this script (5th line).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | #!/usr/bin/env python debug = False crossref_api_key = 'your_crossref_api_key' # get the doi import sys from string import strip for arg in sys.argv[1:]: arg = strip(arg) arg = strip(arg, chars="doi:") arg = strip(arg, chars="http://") arg = strip(arg, chars="dx.doi.org/") doi = strip(arg) # clear from previous text_journal_title = "" text_year = "" text_volume = "" text_issue = "" text_title = "" text_first_author_surname = "" text_first_page = "" text_last_page = "" authorlist = [] # download the xml import urllib from xml.dom import minidom usock = urllib.urlopen('http://www.crossref.org/openurl/?id=doi:'+doi+'&noredirect=true&pid='+crossref_api_key+'&format=unixref') xmldoc = minidom.parse(usock) usock.close() if debug: print xmldoc.toxml() print "" a = xmldoc.getElementsByTagName("doi_records")[0] b = a.getElementsByTagName("doi_record")[0] c = b.getElementsByTagName("crossref")[0] d = c.getElementsByTagName("journal")[0] journal_meta = d.getElementsByTagName("journal_metadata")[0] journal_title = journal_meta.getElementsByTagName("full_title")[0] text_journal_title = journal_title.firstChild.data#.encode('ascii', 'ignore') journal_issue = d.getElementsByTagName("journal_issue")[0] date = journal_issue.getElementsByTagName("publication_date")[0] year = date.getElementsByTagName("year")[0] text_year = year.firstChild.data#.encode('ascii', 'ignore') try: journal_volume = journal_issue.getElementsByTagName("journal_volume")[0] volume = journal_issue.getElementsByTagName("volume")[0] text_volume = volume.firstChild.data#.encode('ascii', 'ignore') except IndexError: pass try: issue = journal_issue.getElementsByTagName("issue")[0] text_issue = issue.firstChild.data#.encode('ascii', 'ignore') except IndexError: pass journal_article = d.getElementsByTagName("journal_article")[0] titles = journal_article.getElementsByTagName("titles")[0] title = titles.getElementsByTagName("title")[0] text_title = title.firstChild.data#.encode('ascii', 'ignore') contributors = journal_article.getElementsByTagName("contributors")[0] for person_name in contributors.getElementsByTagName("person_name"): text_given_name = "" text_surname = "" # get names given_name = person_name.getElementsByTagName("given_name")[0] text_given_name = given_name.firstChild.data#.encode('ascii', 'ignore') surname = person_name.getElementsByTagName("surname")[0] text_surname = surname.firstChild.data#.encode('ascii', 'ignore') authorlist.append(text_surname+", "+text_given_name) #first author? sequence = person_name.attributes.getNamedItem("sequence") if sequence.nodeValue == 'first': text_first_author_surname = text_surname try: pages = journal_article.getElementsByTagName("pages")[0] except: pages = None try: first_page = pages.getElementsByTagName("first_page")[0] text_first_page = first_page.firstChild.data#.encode('ascii', 'ignore') except: pass try: last_page = pages.getElementsByTagName("last_page")[0] text_last_page = last_page.firstChild.data#.encode('ascii', 'ignore') except: pass # physical review if pages == None: try: pages = journal_article.getElementsByTagName("publisher_item")[0] except: pages = None try: first_page = pages.getElementsByTagName("item_number")[0] text_first_page = first_page.firstChild.data#.encode('ascii', 'ignore') except: pass # output print "@ARTICLE{"+text_first_author_surname+text_year[-2:]+"," print "author = {"+" and ".join(authorlist)+"}," print "title = {"+text_title+"}," print "journal = {"+text_journal_title+"}," if not text_volume == "": print "volume = {"+text_volume+"}," if not text_issue == "": print "number = {"+text_issue+"}," print "year = {"+text_year+"}," if ((text_first_page != "") and (text_last_page != "")): print "pages = {"+text_first_page+"-"+text_last_page+"}," if ((text_first_page != "") and (text_last_page == "")): print "pages = {"+text_first_page+"}," print "doi = {"+doi+"}," print "}" |
