{ "translatorID": "6773a9af-5375-3224-d148-d32793884dec", "label": "InfoTrac", "creator": "Simon Kornblith", "target": "^https?://[^/]+/itw/infomark/", "minVersion": "1.0.0b3.r1", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "g", "lastUpdated": "2013-06-06 00:19:59" } function detectWeb(doc, url) { // ensure that there is an InfoTrac logo if(!doc.evaluate('//img[substring(@alt, 1, 8) = "InfoTrac"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) return false; if(doc.title.substring(0, 8) == "Article ") { if (ZU.xpathText(doc, '//td//img[contains(@src, "ncnp_logo.gif")]/@title')) return "newspaperArticle"; var genre = doc.evaluate('//comment()[substring(., 1, 6) = " Genre"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext(); if(genre) { var value = Zotero.Utilities.trimInternal(genre.nodeValue.substr(7)); if(value == "article") { return "journalArticle"; } else if(value == "book") { return "book"; } else if(value == "dissertation") { return "thesis"; } else if(value == "bookitem") { return "bookSection"; } } return "magazineArticle"; } else if(doc.title.substring(0, 10) == "Citations ") { return "multiple"; } } function scrape(doc, url){ var newItem = new Zotero.Item(); var xpath = '/html/body//comment()'; var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); var citation = ZU.xpath(doc, '//p/table/tbody//td/table/tbody[not(./script)]'); newItem.title = ZU.xpathText(citation, './/font/b'); newItem.itemType = "newspaperArticle"; var author = ZU.xpathText(citation, './/td/i'); if (author) newItem.creators.push(ZU.cleanAuthor(author, "author`")); var date = ZU.xpathText(citation, './/td/text()'); if (date) date = date.match(/[A-Z][a-z]+\s\d+,\s\d{4}/); if (date) newItem.date = date[0]; var pdfurl = ZU.xpathText(doc, '//blockquote/a[contains(@href, "!pdf")][1]/@href'); if (pdfurl){ newItem.attachments.push({url: pdfurl, title: "Infotrac Full Text PDF", mimeType: "application/pdf"}) } newItem.attachments.push({document: doc, title: "Infotrac Snapshot", mimeType: "text/html"}); while(elmt = elmts.iterateNext()) { var colon = elmt.nodeValue.indexOf(":"); var field = elmt.nodeValue.substring(1, colon).toLowerCase(); var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1); if(field == "journal") { newItem.publicationTitle = value; } } if (newItem.publicationTitle.search(/\(.+\)/)){ newItem.place = newItem.publicationTitle.match(/\((.+)\)/)[1]; newItem.publicationTitle = newItem.publicationTitle.replace(/\(.+\).*/, ""); } newItem.complete(); } function extractCitation(url, elmts, title, doc) { var newItem = new Zotero.Item(); newItem.url = url; if(title) { newItem.title = Zotero.Utilities.superCleanString(title); } newItem.title = ZU.xpathText(citation, './/font/b'); newItem.itemType = "newspaperArticle"; var date = ZU.xpathText(citation, './/td/text()'); if (date) date = date.match(/[A-Z][a-z]+\s\d+,\s\d{4}/); if (date) newItem.date = date[0]; while(elmt = elmts.iterateNext()) { var colon = elmt.nodeValue.indexOf(":"); var field = elmt.nodeValue.substring(1, colon).toLowerCase(); var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1); if(field == "title") { newItem.title = Zotero.Utilities.superCleanString(value); } else if(field == "journal") { newItem.publicationTitle = value; } else if(field == "pi") { parts = value.split(" "); var date = ""; var field = null; for(j in parts) { firstChar = parts[j].substring(0, 1); if(firstChar == "v") { newItem.itemType = "journalArticle"; field = "volume"; } else if(firstChar == "i") { field = "issue"; } else if(firstChar == "p") { field = "pages"; var pagesRegexp = /p(\w+)\((\w+)\)/; // weird looking page range var match = pagesRegexp.exec(parts[j]); if(match) { // yup, it's weird var finalPage = parseInt(match[1])+parseInt(match[2]) parts[j] = "p"+match[1]+"-"+finalPage.toString(); } else if(!newItem.itemType) { // no, it's normal // check to see if it's numeric, bc newspaper pages aren't var justPageNumber = parts[j].substr(1); if(parseInt(justPageNumber).toString() != justPageNumber) { newItem.itemType = "newspaperArticle"; } } } else if(!field) { // date parts at the beginning, before // anything else date += " "+parts[j]; } if(field) { isDate = false; if(parts[j] != "pNA") { // make sure it's not an invalid // page number // chop of letter newItem[field] = parts[j].substring(1); } else if(!newItem.itemType) { // only newspapers are missing // page numbers on infotrac newItem.itemType = "newspaperArticle"; } } } // Set type if(!newItem.itemType) { newItem.itemType = "magazineArticle"; } if(date != "") { newItem.date = date.substring(1); } } else if(field == "author") { var author = Zotero.Utilities.cleanAuthor(value, "author", true); // ensure author is not already there var add = true; for each(var existingAuthor in newItem.creators) { if(existingAuthor.firstName == author.firstName && existingAuthor.lastName == author.lastName) { add = false; break; } } if(add) newItem.creators.push(author); } else if(field == "issue") { newItem.issue = value; } else if(field == "volume") { newItem.volume = value; } else if(field == "issn") { newItem.ISSN = value; } else if(field == "gjd") { var m = value.match(/\(([0-9]{4}[^\)]*)\)(?:, pp\. ([0-9\-]+))?/); if(m) { newItem.date = m[1]; newItem.pages = m[2]; } } else if(field == "BookTitle") { newItem.publicationTitle = value; } else if(field == "genre") { value = value.toLowerCase(); if(value == "article") { newItem.itemType = "journalArticle"; } else if(value == "book") { newItem.itemType = "book"; } else if(value == "dissertation") { newItem.itemType = "thesis"; } else if(value == "bookitem") { newItem.itemType = "bookSection"; } } } if(doc) { newItem.attachments.push({document:doc, title:"InfoTrac Snapshot"}); } else { newItem.attachments.push({url:url, title:"InfoTrac Snapshot", mimeType:"text/html"}); } newItem.complete(); } function doWeb(doc, url) { var ncnp; if (ZU.xpathText(doc, '//td//img[contains(@src, "ncnp_logo.gif")]/@title')) ncnp = true; /*the only Infotrac Site that's still up & I'm aware of is 19th Century Newspapers. But there may well be others, so I'm leaving a lot of legacy code in just in case */ var uri = doc.location.href; if(doc.title.substring(0, 8) == "Article ") { // article if(ncnp) scrape(doc, url); else{ var xpath = '/html/body//comment()'; var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); extractCitation(uri, elmts); } } else { // search results var items = new Object(); var uris = new Array(); var elmts = new Array(); var host = doc.location.href.match(/^https?:\/\/[^\/]+/)[0]; var baseurl = doc.location.href.match(/(.+)\/purl=/); var institution = url.match(/\?sw_aep=.+/)[0]; var tableRows = doc.evaluate('/html/body//table/tbody/tr/td[b or strong]', doc, null, XPathResult.ANY_TYPE, null); var tableRow; var javaScriptRe = /'([^']*)' *, *'([^']*)'/ var i = 0; // Go through table rows if(ncnp){ while(tableRow = tableRows.iterateNext()) { var title = ZU.trimInternal(ZU.xpathText(tableRow, './strong')); var link = ZU.xpathText(tableRow, './a[1]/@href'); link = link.match(/\(\'(\/.+)\',\'/)[1]; link = baseurl[1] + link + institution; //Z.debug(link) items[link] = title; } Zotero.selectItems(items, function (items) { if (!items) { return true; } for (var i in items) { uris.push(i); } Zotero.Utilities.processDocuments(uris, scrape) }); } else{ while(tableRow = tableRows.iterateNext()) { var link = doc.evaluate('./a', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext(); var m = javaScriptRe.exec(link.href); if(m) { uris[i] = host+"/itw/infomark/192/215/90714844w6"+m[1]+"?sw_aep=olr_wad"+m[2]; } var article = doc.evaluate('./b/text()|./strong/text', link, null, XPathResult.ANY_TYPE, null).iterateNext(); items[i] = article.nodeValue; // Chop off final period if(items[i].substr(items[i].length-1) == ".") { items[i] = items[i].substr(0, items[i].length-1); } elmts[i] = doc.evaluate(".//comment()", tableRow, null, XPathResult.ANY_TYPE, null); citation[i] = ZU.xpath(tableRow, '//') i++; } items = Zotero.selectItems(items); if(!items) { return true; } for(var i in items) { extractCitation(uris[i], elmts[i], items[i]); } } } }