Plugin for fetching data from Isi-WebOfScience

Mario Castro mariocastro73 at gmail.com
Sat Mar 8 05:26:28 EST 2008


Hi all!

After a few days I've created a python plugin for getting informatin from
Isi-web of science

First of all, PYTHON IS AMAZING! Simple, powerful...I'm in love with python
:-)

Here I attach you my plugin (to be placed in $HOME/.referencer/plugins) for
referencer version 1.1.1

It can be improved in many ways. For instance if function getNumberOfRecords
returns a value equal to zero, a warning window could be open with
information about that. similarly, if it returns a number greater than 1, it
would be highly interesting to obtain a window with all the possibilities
and pick one with the mouse, but I don't know how to create a new window

Until next version, enjoy it!


#!/usr/bin/env python

# Get info from isi-web of science from title/author/year fields (any or all
of them)
# Mario Castro, 2008


import os
import referencer
from referencer import _
import sys, urllib2, urllib

from xml.dom import minidom

referencer_plugin_info = []
referencer_plugin_info.append (["longname", _("Get info from ISI Web of
Science")])
referencer_plugin_info.append (["action", _("Get info from ISI Web")])
referencer_plugin_info.append (["tooltip", _("Get info from ISI Web of
Science")])
referencer_plugin_capabilities = []
referencer_plugin_capabilities.append ("document_action")


def get_fields (doc, field, separator):
    value = doc.getElementsByTagName(field)
    output=''
    if len(value) == 0:
        return ""
    else:
        length=len(value)
        if (len(value[0].childNodes) == 0):
            return ""
        else:
            #for items in value:
            for index in range(length-1):

output+=value[index].childNodes[0].data.encode("utf-8")+separator
        return output+value[length-1].childNodes[0].data.encode("utf-8")

def get_last_field (doc, field):
    value = doc.getElementsByTagName(field)
    if len(value) == 0:
        return ""
    else:
        if (len(value[0].childNodes) == 0):
            return ""
        else:
            for items in value:
                last=items.childNodes[0].data.encode("utf-8")
            return last

def get_field (doc, field):
    value = doc.getElementsByTagName(field)
    if len(value) == 0:
        return ""
    else:
        if (len(value[0].childNodes) == 0):
            return ""
        else:
            return value[0].childNodes[0].data.encode("utf-8")


def get_attribute_from_field (doc, field, attr):
    value = doc.getElementsByTagName(field)
    return value[0].getAttribute(attr)

def getNumberOfRecords (document):
    title = document.get_field("title")
    year = document.get_field ("year")
    author= document.get_field ("author")

    ti=urllib.urlencode([('','('+title+')')])
    ye=urllib.urlencode([('','('+year+')')])
    au=urllib.urlencode([('','('+author+')')])

    url0='
http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&rspType=xml&method=search&firstRec=1&numRecs=1&query=TI'+ti+'&PY'+ye+'&AU'+au
    data0 = referencer.download (_("Obtaining data from ISI-WebOfScience"),
_("Fetching number of ocurrences for %s/%s/%s") % (author,title,year),
url0);
    xmldoc0 = minidom.parseString(data0)
    recordsFound=get_field(xmldoc0,"recordsFound")
    return recordsFound

def getAndSetFields(document):

    title = document.get_field("title")
    year = document.get_field ("year")
    author= document.get_field ("author")

    page_orig=document.get_field("pages")
    journal_orig=document.get_field("journal")
    volume=document.get_field("volume")

    ti=urllib.urlencode([('','('+title+')')])
    ye=urllib.urlencode([('','('+year+')')])
    au=urllib.urlencode([('','('+author+')')])

    url='
http://estipub.isiknowledge.com/esti/cgi?databaseID=WOS&SID=Q1mNFhCECOk6c8aELLh&rspType=xml&method=searchRetrieve&firstRec=1&numRecs=1&query=TI'+ti+'&PY'+ye+'&AU'+au
    data = referencer.download (_("Obtaining data from ISI-WebOfScience"),
_("Fetching data for %s/%s/%s") % (author,title,year), url);
    xmldoc = minidom.parseString(data)
    authors=get_field(xmldoc,"primaryauthor")
    more_authors=get_fields(xmldoc,"author",' and ')
    if(len(more_authors)>0):
        authors+=' and '+more_authors
    abstract=get_field(xmldoc,"p")
    keywords=get_fields(xmldoc,"keyword",', ')
    journal=get_field(xmldoc,"source_title")
    doi=get_last_field(xmldoc,"article_no")
    pages=get_field(xmldoc,"bib_pages")
    title_isi=get_field(xmldoc,"item_title")
    year_isi=get_attribute_from_field(xmldoc,"bib_issue","year")
    volume_isi=get_attribute_from_field(xmldoc,"bib_issue","vol")

    if (len(year)==0 and len(year_isi)>0):
        document.set_field("year",year_isi)
    if (len(volume)==0 and len(volume_isi)>0):
        document.set_field("volume",volume_isi)
    if (len(title)>0):
        document.set_field("title",title_isi)
    if (len(authors)>0):
        document.set_field("author",authors)
    if (len(doi)>0):
        document.set_field("doi",doi)
    if (len(journal_orig)==0 and len(journal)>0):
        document.set_field("journal",journal)
    if (len(page_orig)<len(pages) and pages!='-'):
        document.set_field("pages",pages)
    if (len(abstract)>0):
        document.set_field("abstract",abstract)
    if (len(keywords)>0):
        document.set_field("keywords",keywords)

def do_action (documents):
    empty = True
    s = ""
    assigned_keys = {}
    for document in documents:
        rec=getNumberOfRecords(document)
        if (rec=='1'):
            getAndSetFields(document)

    return True
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://icculus.org/pipermail/referencer/attachments/20080308/6d88f323/attachment.htm>


More information about the referencer mailing list