#!/usr/bin/env python
# Get info from isi-web of science from title/author/year fields (any or all of them)
# Mario Castro, 2008
# Yoav Avitzour, 2008
import os
import referencer
from referencer import _
import sys, urllib2, urllib
import gobject
import gtk
from xml.dom import minidom
referencer_plugin_info = {
"author": "Mario Castro, Yoav Avitzour",
"version": "0.0.3",
"ui":
"""
""",
"longname": _("ISI Web of Science resolver (requires subscription)"),
"action": _("Get metadata from ISI Web of Science"),
"tooltip": _("ISI Web of Science resolver (requires subscription)")}
referencer_plugin_actions = [{
"name":"_plugin_isi",
"label":_("ISI Info"),
"tooltip":_("Retrieve metadata for the selected documents from ISI Web of Science"),
"icon":"_stock:gtk-edit",
"callback":"do_action",
"sensitivity":"sensitivity_genkey",
"accelerator":"i"
}]
class isiRec:
def __init__(self, document = None, firstrec = None):
self.authors=''
self.abstract=''
self.keywords=''
self.journal=''
self.doi=''
self.pages=''
self.title=''
self.year=''
self.volume=''
if document is not None:
self.get_record_from_document(document,firstrec)
def set_fields_from_data(self,data):
xmldoc = minidom.parseString(data)
self.authors=get_field(xmldoc,"primaryauthor")
more_authors=get_fields(xmldoc,"author",' and ')
if(len(more_authors)>0):
self.authors+=' and '+more_authors
self.abstract=get_field(xmldoc,"p")
self.keywords=get_fields(xmldoc,"keyword",', ')
self.journal=get_field(xmldoc,"source_title")
doi=get_last_field(xmldoc,"article_no")
if len(doi) > 0:
self.doi = doi[4:]
else:
self.doi = doi
self.pages=get_field(xmldoc,"bib_pages")
if self.pages == '-':
artn = get_field(xmldoc,"article_no")
self.pages = artn[4:]
self.title=get_field(xmldoc,"item_title")
self.year=get_attribute_from_field(xmldoc,"bib_issue","year")
self.volume=get_attribute_from_field(xmldoc,"bib_issue","vol")
def get_data(self,document, firstrec=None, numrecs=None):
title = document.get_field("title")
year = document.get_field ("year")
author= document.get_field ("author")
if firstrec is None:
firstrec = 1
if numrecs is None:
numrecs = 1
url='http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&SID=Q1mNFhCECOk6c8aELLh&rspType=xml&method=searchRetrieve'
url += \
'&firstRec=' + str(firstrec) + \
'&numRecs=' + str(numrecs) + \
'&query=' + getQuery(document)
data = referencer.download (_("Obtaining data from ISI-WebOfScience"), _("Fetching data for %s/%s/%s") % (author,title,year), url);
return data
def get_record_from_document(self,document,firstrec=None, numrecs=None):
print 'firstrec = ', firstrec
data = self.get_data(document,firstrec,numrecs)
self.set_fields_from_data(data)
def set_document_from_record(self,document):
if (len(self.year)>0):
document.set_field("year",self.year)
if (len(self.volume)>0):
document.set_field("volume",self.volume)
if (len(self.title)>0):
document.set_field("title",self.title.title())
if (len(self.authors)>0):
document.set_field("author",capitalize_authors(self.authors))
if (len(self.doi)>0):
document.set_field("doi",self.doi)
if (len(self.journal)>0):
document.set_field("journal",self.journal.title())
if (len(self.pages)>0):
document.set_field("pages",self.pages)
if (len(self.abstract)>0):
document.set_field("abstract",self.abstract)
if (len(self.keywords)>0):
document.set_field("keywords",self.keywords)
return document
class recordChooser(gtk.Dialog):
def __init__(self,records = None, parent = None):
gtk.Dialog.__init__(self,"ISI record chooser dialog",
parent,
gtk.DIALOG_MODAL |
gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_CANCEL, gtk.RESPONSE_REJECT,
gtk.STOCK_OK, gtk.RESPONSE_OK))
self.records = records
self.recTree = gtk.TreeStore(bool,str,str,bool)
self.treeview = gtk.TreeView(self.recTree)
self.tvcolumn0 = gtk.TreeViewColumn()
self.tvcolumn1 = gtk.TreeViewColumn()
self.tvcolumn2 = gtk.TreeViewColumn()
scwindow = gtk.ScrolledWindow()
scwindow.set_policy(gtk.POLICY_AUTOMATIC,gtk.POLICY_AUTOMATIC)
scwindow.add_with_viewport(self.treeview)
self.vbox.pack_start(scwindow,True,True,0)
self.treeview.append_column(self.tvcolumn0)
self.treeview.append_column(self.tvcolumn1)
self.treeview.append_column(self.tvcolumn2)
self.tcell = gtk.CellRendererText()
self.bcell = gtk.CellRendererToggle()
self.bcell.set_radio(True)
self.bcell.connect("toggled", self.toggled_cb, (self.recTree, 3))
self.bcell.set_property('activatable',True)
self.tvcolumn0.pack_start(self.bcell, True)
self.tvcolumn0.add_attribute(self.bcell, 'visible', 0)
self.tvcolumn0.add_attribute(self.bcell, 'active', 3)
self.tvcolumn1.pack_start(self.tcell, True)
self.tvcolumn1.add_attribute(self.tcell, 'text', 1)
self.tvcolumn2.pack_start(self.tcell, True)
self.tvcolumn2.add_attribute(self.tcell, 'text', 2)
self.fields = ['authors','title','journal','volume','pages','year']
if records is not None:
self.add_records()
self.current_record = 0
self.treeview.expand_all()
self.resize(400,300)
self.vbox.show_all()
def add_records(self):
outerrow = 0
for rec in self.records:
outeriter = self.recTree.insert(None,outerrow)
if outerrow == 0:
isactive = True
else:
isactive = False
self.recTree.set(outeriter,0,True,
1,'Record '+str(outerrow+1),3,isactive)
for innerrow in range(len(self.fields)):
self.recTree.set(self.recTree.insert(outeriter,innerrow),
0,False,
1,self.fields[innerrow],
2,eval('rec.'+self.fields[innerrow]),
3,False)
outerrow += 1
def toggled_cb(self, cell, path, user_data):
model, column = user_data
for row in model:
row[column] = False
model[path][column] = True
self.current_record = path
return
def capitalize_authors(authors):
spltau = authors.split()
nau = (len(spltau)+1)/3
for i in range(nau):
spltau[3*i] = spltau[3*i].capitalize()
spltau[3*i+1] = spltau[3*i+1].upper()
return ' '.join(spltau)
def get_fields (doc, field, separator):
value = doc.getElementsByTagName(field)
output=''
if len(value) == 0:
return ""
else:
length=len(value)
if (len(value[0].childNodes) == 0):
return ""
else:
#for items in value:
for index in range(length-1):
output+=value[index].childNodes[0].data.encode("utf-8")+separator
return output+value[length-1].childNodes[0].data.encode("utf-8")
def get_last_field (doc, field):
value = doc.getElementsByTagName(field)
if len(value) == 0:
return ""
else:
if (len(value[0].childNodes) == 0):
return ""
else:
for items in value:
last=items.childNodes[0].data.encode("utf-8")
return last
def get_field (doc, field):
value = doc.getElementsByTagName(field)
if len(value) == 0:
return ""
else:
if (len(value[0].childNodes) == 0):
return ""
else:
return value[0].childNodes[0].data.encode("utf-8")
def get_attribute_from_field (doc, field, attr):
value = doc.getElementsByTagName(field)
return value[0].getAttribute(attr)
def getNumberOfRecords (document):
title = document.get_field("title")
year = document.get_field ("year")
author= document.get_field ("author")
url0='http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&rspType=xml&method=search&firstRec=1&numRecs=1'
url0+= '&query='+getQuery(document)
data0 = referencer.download(
_("Obtaining data from ISI-WebOfScience"),
_("Fetching number of ocurrences for %s/%s/%s") % (author,title,year),
url0)
xmldoc0 = minidom.parseString(data0)
recordsFound=get_field(xmldoc0,"recordsFound")
return int(recordsFound)
def getQuery(document):
query = ''
title = document.get_field("title")
title = remove_non_ascii_chars(title)
if len(title) > 0:
ti=urllib.urlencode([('','('+title+')')])
query = 'TI'+ti+'&'
year = document.get_field ("year")
if len(year)>0:
ye=urllib.urlencode([('','('+year+')')])
query+= 'PY'+ye+'&'
author= document.get_field ("author")
author = remove_non_ascii_chars(author)
if len(author)>0:
au=urllib.urlencode([('','('+author+')')])
query+= 'AU'+au+'&'
return query
def do_action (library,documents):
empty = True
s = ""
assigned_keys = {}
for document in documents:
nrecs=getNumberOfRecords(document)
if nrecs>1:
rec = choose_record(document,nrecs)
if rec is not False:
rec.set_document_from_record(document)
elif nrecs == 1:
rec = isiRec(document)
rec.set_document_from_record(document)
return True
def remove_non_ascii_chars(si):
for s in si:
if ord(s)>127:
si = si.replace(s,'',1)
return si
def choose_record(document,nrecs):
CurrentRecord = isiRec()
records = []
for rec in range(nrecs):
irec = isiRec(document,rec+1)
records.append(irec)
recChoose = recordChooser(records)
response = recChoose.run()
if response == int(gtk.RESPONSE_OK):
currentrec = recChoose.current_record
currentrec = records[int(currentrec)]
else:
currentrec = False
recChoose.destroy()
return currentrec