{ "translatorID": "57a00950-f0d1-4b41-b6ba-44ff0fc30289", "label": "Google Scholar", "creator": "Simon Kornblith, Frank Bennett, Aurimas Vinckevicius", "target": "^https?://scholar\\.google\\.(?:com|cat|(?:com?\\.)?[a-z]{2})/(?:scholar(?:_case)?\\?|citations\\?)", "minVersion": "3.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", "lastUpdated": "2014-07-25 21:01:15" } /* * Test pages * * Searches of Google Scholar with the following terms should yield a folder * icon that works. Check that unlinked ([CITATION]) items that provide * no BibTeX data (there is currently one under "Marbury v. Madison", * and "clifford" seems to be a good source of garbage) are * dropped from the listings: * * marbury v madison * kelo * smith * view of the cathedral * clifford * * "How cited" pages should NOT yield a page or folder icon. The * Urls to these currently look like this: * * http://scholar.google.co.jp/scholar_case?about=1101424605047973909&q=kelo&hl=en&as_sdt=2002 * * Case pages should present a document icon that works: * * http://scholar.google.co.jp/scholar_case?case=18273389148555376997&hl=en&as_sdt=2002&kqfp=13204897074208725174&kql=186&kqpfp=16170611681001262513#kq */ var bogusItemID = 1; var __old_CF, __result_counter=0; var detectWeb = function (doc, url) { // Icon shows only for search results and law cases if (url.indexOf('/scholar_case?') != -1 && url.indexOf('about=') == -1) { return "case"; } else if(url.indexOf('/citations?') != -1) { //individual saved citation var link = ZU.xpathText(doc, '//a[@class="gsc_title_link"]/@href'); if(!link) return; if(link.indexOf('/patents?') != -1) { return 'patent'; } else if(link.indexOf('/scholar_case?') != -1) { return 'case'; } else { //Can't distinguish book from journalArticle //Both have "Journal" fields return 'journalArticle'; } } else if( getViableResults(doc).length ) { return "multiple"; } } /********************************* * Cookie manipulation functions * *********************************/ //sets Google Scholar Preference cookie function setGSPCookie(doc, cf) { var m = doc.cookie.match(/\bGSP=[^;]+/); var cookie = m ? m[0] : ''; if(!cookie) return; Z.debug('Changing cookie: ' + cookie); if(cookie.search(/\bCF=/) != -1) { cookie = cookie.replace(/\s*\bCF=\d*(:?)/,cf ? 'CF=' + cf + '$1' : ''); } else { cookie += ':CF=' + cf; } // Make sure we capture "0-" in // http://0-scholar.google.co.za.innopac.up.ac.za/... var domain = doc.location.href .match(/https?:\/\/[^\/]*?([^.\/]*scholar\.google\.[^:\/]+)/i)[1]; cookie += '; domain=.' + domain + '; expires=Sun, 17 Jan 2038 19:14:09 UTC'; //this is what google scholar uses doc.cookie = cookie; Z.debug('Cookie set to: ' + cookie); } //set cookie using Googles Scholar preferences page function setCookieThroughPrefs(doc, callback) { url = doc.location.href.replace(/hl\=[^&]*&?/, "") .replace("scholar?", "scholar_settings?"); ZU.doGet(url, function(scisigDoc) { var scisig = //ig); if(!form) { Z.debug('No forms found on page.'); Z.debug(scisigDoc); } else { Z.debug(form.join('\n\n')); } } url = url.replace("scholar_settings?", "scholar_setprefs?") + "&scis=yes&scisf=4&submit=&scisig="+scisig[1]; //set prefernces Z.debug('Submitting settings to Google Scholar: ' + url); ZU.doGet(url, function(response) { callback(doc); }); }); } function prepareCookie(doc, callback) { // Google Scholar always sets GSP if(doc.cookie.search(/\bGSP=/) != -1) { //check if we need to change cookie var m = doc.cookie.match(/\bGSP=[^;]*?\bCF=(\d+)/); __old_CF = undefined; if(!m || m[1] != '4') { __old_CF = (m && m[1]) || ''; setGSPCookie(doc, '4'); } callback(doc); } else { Z.debug("Attempting to set cookie through GS Settings page"); //some proxies do not pass cookies through, so we need to set this by //going to the preferences page setCookieThroughPrefs(doc, callback); } } function restoreCookie(doc) { if(__old_CF != undefined) { setGSPCookie(doc, __old_CF); } } function decrementCounter(doc) { /**Possible race condition!! But there should never be any * lock-ups or detremental effects as long as only one * instance of the translator can be run at a time and * we do not change __old_cookie after setting it initially */ __result_counter--; if(__result_counter<1) restoreCookie(doc); } /***************************** * Other accessory functions * *****************************/ //determine item type from a result node function determineType(result) { var titleHref = ZU.xpathText(result, './/h3[@class="gs_rt"]/a[1]/@href'); if(titleHref) { if(titleHref.indexOf('/scholar_case?') != -1) { return 'case'; } else if(titleHref.indexOf('/patents?') != -1) { return 'patent'; } else if(titleHref.indexOf('/books?') != -1) { return 'book'; } else if(titleHref.indexOf('/citations?') == -1){ //not a saved citation return 'article'; } } /**if there is no link (i.e. [CITATION]), or we're looking at saved citations * we can determine this by the second line. * Patents have the word Patent here * Cases seem to always start with a number * Books just have year after last dash * Articles are assumed to be everything else * * This is probably not going to work with google scholar in other languages */ var subTitle = ZU.xpathText(result, './/div[@class="gs_a"]'); if(!subTitle) return 'article'; subTitle = subTitle.trim(); if(subTitle.search(/\bpatent\s+\d/i) != -1) { return 'patent'; } if(subTitle.search(/^\d/) != -1) { return 'case'; } if(subTitle.search(/-\s*\d+$/) != -1) { return 'book'; } return 'article'; } function getAttachment(url, title) { //try to determine mimeType from title var m = title.match(/^\s*\[([^\]]+)\]/); if(!m) return; m = m[1].toUpperCase(); var mimeType = getAttachment.mimeTypes[m]; if(!mimeType) return; return {title: title, url: url, mimeType: mimeType}; } getAttachment.mimeTypes = { 'PDF': 'application/pdf', 'DOC': 'application/msword', 'HTML': 'text/html' }; /********************* * Scraper functions * *********************/ function getViableResults(doc) { return ZU.xpath(doc, '//div[@class="gs_r"]\ [.//div[@class="gs_fl"]/a[contains(@href,"q=info:")\ or contains(@href,"q=related:")\ or contains(@onclick, "gs_ocit(event")]]'); } function scrapeArticleResults(doc, articles) { for(var i=0, n=articles.length; i 0) { return true; } return false; }; ItemFactory.prototype.getDate = function () { var i, m; // Citelet parsing, step (1) if (!this.hyphenSplit) { this.hyphenSplit = this.citelet.split(/\s+-\s+/); this.trailingInfo = this.hyphenSplit.slice(-1); } if (!this.v.date && this.v.date !== false) { this.v.date = false; for (i = this.hyphenSplit.length - 1; i > -1; i += -1) { m = this.hyphenSplit[i].match(/(?:(.*)\s+)*([0-9]{4})$/); if (m) { this.v.date = m[2]; if (m[1]) { this.hyphenSplit[i] = m[1]; } else { this.hyphenSplit[i] = ""; } this.hyphenSplit = this.hyphenSplit.slice(0, i + 1); break; } } } return this.v.date; }; ItemFactory.prototype.getCourt = function () { var s, m; // Citelet parsing, step (2) s = this.hyphenSplit.pop().replace(/,\s*$/, "").replace(/\u2026\s*$/, "Court"); m = s.match(/(?:([a-zA-Z]+):\s*)*(.*)/); if (m) { this.v.court = m[2].replace(/_/g, " "); if (m[1]) { this.v.extra = "{:jurisdiction: " + m[1] + "}"; } } return this.v.court; }; ItemFactory.prototype.getVolRepPag = function () { var i, m; // Citelet parsing, step (3) if (this.hyphenSplit.length) { this.commaSplit = this.hyphenSplit.slice(-1)[0].split(/\s*,\s+/); var gotOne = false; for (i = this.commaSplit.length - 1; i > -1; i += -1) { m = this.commaSplit[i].match(/^([0-9]+)\s+(.*)\s+(.*)/); if (m) { var volRepPag = {}; volRepPag.volume = m[1]; volRepPag.reporter = m[2]; volRepPag.pages = m[3].replace(/\s*$/, ""); this.commaSplit.pop(); if (!volRepPag.pages.match(/[0-9]$/) && (i > 0 || gotOne)) { continue; } gotOne = true; this.vv.volRepPag.push(volRepPag); } else { break; } } } }; ItemFactory.prototype.getTitle = function () { // Citelet parsing, step (4) [optional] if (this.commaSplit) { this.v.title = this.commaSplit.join(", "); } }; ItemFactory.prototype.getDocketNumber = function (doc) { var docNumFrag = doc.evaluate( '//center[preceding-sibling::center//h3[@id="gsl_case_name"]]\ | //div[@class="gsc_value" and preceding-sibling::div[text()="Docket id"]]', doc, null, XPathResult.ANY_TYPE, null).iterateNext(); if (docNumFrag) { this.v.docketNumber = docNumFrag.textContent .replace(/^\s*[Nn][Oo](?:.|\s+)\s*/, "") .replace(/\.\s*$/, ""); } }; ItemFactory.prototype.getAttachments = function (doctype) { var i, ilen, attachments; attachments = []; for (i = 0, ilen = this.attachmentLinks.length; i < ilen; i += 1) { attachments.push({title:"Google Scholar Linked " + doctype, type:"text/html", url:this.attachmentLinks[i]}); } return attachments; }; ItemFactory.prototype.pushAttachments = function (doctype) { this.item.attachments = this.getAttachments(doctype); }; /* ItemFactory.prototype.getBibtexData = function (callback) { if (!this.bibtexData) { if (this.bibtexData !== false) { Zotero.Utilities.doGet(this.bibtexLink, function(bibtexData) { if (!bibtexData.match(/title={{}}/)) { this.bibtexData = bibtexData; } else { this.bibtexData = false; } callback(this.bibtexData); }); return; } } callback(this.bibtexData); }; */ ItemFactory.prototype.saveItem = function () { var i, ilen, key; if (this.v.title) { this.repairTitle(); if (this.vv.volRepPag.length) { var completed_items = []; for (i = 0, ilen = this.vv.volRepPag.length; i < ilen; i += 1) { this.item = new Zotero.Item("case"); for (key in this.vv.volRepPag[i]) { if (this.vv.volRepPag[i][key]) { this.item[key] = this.vv.volRepPag[i][key]; } } this.saveItemCommonVars(); if (i === (this.vv.volRepPag.length - 1)) { this.pushAttachments("Judgement"); } this.item.itemID = "" + bogusItemID; bogusItemID += 1; completed_items.push(this.item); } for (i = 0, ilen = completed_items.length; i < ilen; i += 1) { for (j = 0, jlen = completed_items.length; j < jlen; j += 1) { if (i === j) { continue; } completed_items[i].seeAlso.push(completed_items[j].itemID); } completed_items[i].complete(); } } else { this.item = new Zotero.Item("case"); this.saveItemCommonVars(); this.pushAttachments("Judgement"); this.item.complete(); } } }; ItemFactory.prototype.saveItemCommonVars = function () { for (key in this.v) { if (this.v[key]) { this.item[key] = this.v[key]; } } }; /** BEGIN TEST CASES **/ var testCases = [ { "type": "web", "url": "http://scholar.google.com/scholar?q=marbury&hl=en&btnG=Search&as_sdt=1%2C22&as_sdtp=on", "items": "multiple" }, { "type": "web", "url": "http://scholar.google.com/scholar?hl=en&q=kelo&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0", "items": "multiple" }, { "type": "web", "url": "http://scholar.google.com/scholar?hl=en&q=smith&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0", "items": "multiple" }, { "type": "web", "url": "http://scholar.google.com/scholar?hl=en&q=view+of+the+cathedral&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0", "items": "multiple" }, { "type": "web", "url": "http://scholar.google.com/scholar?hl=en&q=clifford&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0", "items": "multiple" }, { "type": "web", "url": "http://scholar.google.com/scholar_case?case=9834052745083343188&q=marbury+v+madison&hl=en&as_sdt=2,5", "items": [ { "itemType": "case", "creators": [], "notes": [], "tags": [], "seeAlso": [], "attachments": [ { "title": "Google Scholar Linked Judgement", "type": "text/html", "url": false } ], "volume": "5", "reporter": "US", "pages": "137", "title": "Marbury v. Madison", "court": "Supreme Court", "date": "1803", "itemID": "1", "libraryCatalog": "Google Scholar" } ] } ] /** END TEST CASES **/