Plugin for fetching data from Isi-WebOfScience
Mario Castro
mariocastro73 at gmail.com
Sat Mar 8 05:26:28 EST 2008
Hi all!
After a few days I've created a python plugin for getting informatin from
Isi-web of science
First of all, PYTHON IS AMAZING! Simple, powerful...I'm in love with python
:-)
Here I attach you my plugin (to be placed in $HOME/.referencer/plugins) for
referencer version 1.1.1
It can be improved in many ways. For instance if function getNumberOfRecords
returns a value equal to zero, a warning window could be open with
information about that. similarly, if it returns a number greater than 1, it
would be highly interesting to obtain a window with all the possibilities
and pick one with the mouse, but I don't know how to create a new window
Until next version, enjoy it!
#!/usr/bin/env python
# Get info from isi-web of science from title/author/year fields (any or all
of them)
# Mario Castro, 2008
import os
import referencer
from referencer import _
import sys, urllib2, urllib
from xml.dom import minidom
referencer_plugin_info = []
referencer_plugin_info.append (["longname", _("Get info from ISI Web of
Science")])
referencer_plugin_info.append (["action", _("Get info from ISI Web")])
referencer_plugin_info.append (["tooltip", _("Get info from ISI Web of
Science")])
referencer_plugin_capabilities = []
referencer_plugin_capabilities.append ("document_action")
def get_fields (doc, field, separator):
value = doc.getElementsByTagName(field)
output=''
if len(value) == 0:
return ""
else:
length=len(value)
if (len(value[0].childNodes) == 0):
return ""
else:
#for items in value:
for index in range(length-1):
output+=value[index].childNodes[0].data.encode("utf-8")+separator
return output+value[length-1].childNodes[0].data.encode("utf-8")
def get_last_field (doc, field):
value = doc.getElementsByTagName(field)
if len(value) == 0:
return ""
else:
if (len(value[0].childNodes) == 0):
return ""
else:
for items in value:
last=items.childNodes[0].data.encode("utf-8")
return last
def get_field (doc, field):
value = doc.getElementsByTagName(field)
if len(value) == 0:
return ""
else:
if (len(value[0].childNodes) == 0):
return ""
else:
return value[0].childNodes[0].data.encode("utf-8")
def get_attribute_from_field (doc, field, attr):
value = doc.getElementsByTagName(field)
return value[0].getAttribute(attr)
def getNumberOfRecords (document):
title = document.get_field("title")
year = document.get_field ("year")
author= document.get_field ("author")
ti=urllib.urlencode([('','('+title+')')])
ye=urllib.urlencode([('','('+year+')')])
au=urllib.urlencode([('','('+author+')')])
url0='
http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&rspType=xml&method=search&firstRec=1&numRecs=1&query=TI'+ti+'&PY'+ye+'&AU'+au
data0 = referencer.download (_("Obtaining data from ISI-WebOfScience"),
_("Fetching number of ocurrences for %s/%s/%s") % (author,title,year),
url0);
xmldoc0 = minidom.parseString(data0)
recordsFound=get_field(xmldoc0,"recordsFound")
return recordsFound
def getAndSetFields(document):
title = document.get_field("title")
year = document.get_field ("year")
author= document.get_field ("author")
page_orig=document.get_field("pages")
journal_orig=document.get_field("journal")
volume=document.get_field("volume")
ti=urllib.urlencode([('','('+title+')')])
ye=urllib.urlencode([('','('+year+')')])
au=urllib.urlencode([('','('+author+')')])
url='
http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&SID=Q1mNFhCECOk6c8aELLh&rspType=xml&method=searchRetrieve&firstRec=1&numRecs=1&query=TI'+ti+'&PY'+ye+'&AU'+au
data = referencer.download (_("Obtaining data from ISI-WebOfScience"),
_("Fetching data for %s/%s/%s") % (author,title,year), url);
xmldoc = minidom.parseString(data)
authors=get_field(xmldoc,"primaryauthor")
more_authors=get_fields(xmldoc,"author",' and ')
if(len(more_authors)>0):
authors+=' and '+more_authors
abstract=get_field(xmldoc,"p")
keywords=get_fields(xmldoc,"keyword",', ')
journal=get_field(xmldoc,"source_title")
doi=get_last_field(xmldoc,"article_no")
pages=get_field(xmldoc,"bib_pages")
title_isi=get_field(xmldoc,"item_title")
year_isi=get_attribute_from_field(xmldoc,"bib_issue","year")
volume_isi=get_attribute_from_field(xmldoc,"bib_issue","vol")
if (len(year)==0 and len(year_isi)>0):
document.set_field("year",year_isi)
if (len(volume)==0 and len(volume_isi)>0):
document.set_field("volume",volume_isi)
if (len(title)>0):
document.set_field("title",title_isi)
if (len(authors)>0):
document.set_field("author",authors)
if (len(doi)>0):
document.set_field("doi",doi)
if (len(journal_orig)==0 and len(journal)>0):
document.set_field("journal",journal)
if (len(page_orig)<len(pages) and pages!='-'):
document.set_field("pages",pages)
if (len(abstract)>0):
document.set_field("abstract",abstract)
if (len(keywords)>0):
document.set_field("keywords",keywords)
def do_action (documents):
empty = True
s = ""
assigned_keys = {}
for document in documents:
rec=getNumberOfRecords(document)
if (rec=='1'):
getAndSetFields(document)
return True
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://icculus.org/pipermail/referencer/attachments/20080308/6d88f323/attachment.htm>
More information about the referencer
mailing list