-
Notifications
You must be signed in to change notification settings - Fork 0
/
biblioplast.py
77 lines (68 loc) · 2.67 KB
/
biblioplast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/python
# Reads a list of PMID's (one per line) and outputs csv or json
# May need to run: pip install requests xmltodict json unidecode
# json is complete record, csv is not.
# csv columns: PMID, Title, Abstract
# Usage: echo "12345678" | biblioplast.py -j --or--
# cat ListOfPMIDs.txt | biblioplast.py > pmids_with_title_abs.csv
import argparse
import fileinput
import requests
import xmltodict
import json
from unidecode import unidecode
def abstractIt(abstract):
if isinstance(abstract, str) or isinstance(abstract, unicode):
return abstract
if isinstance(abstract, list):
parsed = ''
for item in abstract:
parsed += abstractIt(item)
return parsed
if isinstance(abstract, dict):
parsed = ''
parsed += abstract['@Label'] + ": "
parsed += abstract['#text'] + " "
return parsed
else:
return 'None'
def setupArgs():
parser = argparse.ArgumentParser(description='Get PMID record, print CSV/JSON')
parser.add_argument('-j', '--json',
action='store_true',
help='Return JSON, default CSV')
parser.add_argument('--headers',
action='store_true',
help='[NOT IMPLEMENTED] Print CSV headers' )
return parser
def getCSV(pmid):
try:
r = requests.get('http://www.ncbi.nlm.nih.gov/pubmed/' + pmid + '?report=xml')
if r.status_code != 200:
return '{0}, \"Error retriving: {1}\", \"None\"'.format(pmid, r.status_code)
doc = xmltodict.parse(r.text.replace('<','<').replace('>','>'))['pre']
title = doc['PubmedArticle']['MedlineCitation']['Article']['ArticleTitle']
abstract = doc['PubmedArticle']['MedlineCitation']['Article']['Abstract']['AbstractText']
abstract = abstractIt(abstract)
return '{0}, \"{1}\", \"{2}\"'.format(pmid, unidecode(title), unidecode(abstract))
except Exception as error:
return "{0}, \"Unexpected Error: {1}\", \"None\"".format(pmid, error)
def getJSON(pmid):
try:
r = requests.get('http://www.ncbi.nlm.nih.gov/pubmed/' + pmid + '?report=xml')
doc = xmltodict.parse(r.text.replace('<','<').replace('>','>'))['pre']
return json.dumps(doc['PubmedArticle']['MedlineCitation'], indent=4)
except Exception as error:
return "{pmid: {0}, error: {1}}".format(pmid,error)
def main():
parser = setupArgs()
args = parser.parse_args()
for line in fileinput.input():
id = line.strip()
if args.json:
print(getJSON(id))
print "\n\n\n\n"
else:
print(getCSV(id))
if __name__ == "__main__":
main()