summaryrefslogtreecommitdiff
path: root/zotero-import/CiteSeer.js
blob: 9fca79c8b1d46f98655c666537ee3da1fd1fc03f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
{
	"translatorID":"fa396dd4-7d04-4f99-95e1-93d6f355441d",
	"translatorType":4,
	"label":"CiteSeer",
	"creator":"Simon Kornblith",
	"target":"^http://(?:citeseer\\.ist\\.psu\\.edu/|citeseer\\.csail\\.mit\\.edu/|citeseer\\.ifi\\.unizh\\.ch/|citeseer\\.comp\\.nus\\.edu\\.sg/)",
	"minVersion":"1.0.0b3.r1",
	"maxVersion":"",
	"priority":100,
	"inRepository":true,
	"lastUpdated":"2008-02-06 21:00:00"
}

function detectWeb(doc, url) {
	var searchRe = /http:\/\/[^\/]+\/ci?s/;
	if(searchRe.test(url)) {
		return "multiple";
	} else {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == 'x') return namespace; else return null;
		} : null;
		
		if(doc.evaluate('/html/body/span[@class="m"]/pre', doc, nsResolver,
		                XPathResult.ANY_TYPE, null).iterateNext()) {
			return "journalArticle";
		}
	}
}

function scrape(doc) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == 'x') return namespace; else return null;
	} : null;
	
	// figure out what attachments to add
	var attachments = new Array();
	var results = doc.evaluate('/html/body/span[@class="m"]/table[@class="h"]/tbody/tr/td[4]/center/font/a',
	                       doc, nsResolver, XPathResult.ANY_TYPE, null);
	var elmt;
	
	var acceptableTypes = ["PDF", "PS", "PS.gz"];
	var mimeTypes = ["application/pdf", "application/postscript", "application/gzip"];
	var resultsArray = [];
	while (elmt = results.iterateNext()) {
		resultsArray.push(elmt);
	}
	resultsArray = resultsArray.filter(function (element, index, array) {
		return (acceptableTypes.indexOf(element.textContent.toString()) != -1);
	});
	resultsArray = resultsArray.sort(function (a,b) {
		return (acceptableTypes.indexOf(a.textContent.toString()) -
			acceptableTypes.indexOf(b.textContent.toString()));
	});
	if (resultsArray.length > 0) {
		var elmt = resultsArray[0];
		var kind = elmt.textContent.toString();
		var index = acceptableTypes.indexOf(kind);
	       	var attachment = {url:elmt.href, mimeType:mimeTypes[index],
			       	  title:"CiteSeer Full Text "+kind};
		attachments.push(attachment);
	}
	
	var bibtex = doc.evaluate('/html/body/span[@class="m"]/pre/text()', doc, nsResolver,
		                XPathResult.ANY_TYPE, null).iterateNext();
	if(bibtex) {
		var translator = Zotero.loadTranslator("import");
		translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
		translator.setString(bibtex.nodeValue.toString());
		translator.setHandler("itemDone", function(obj, item) {
			if(item.url) {	// add http to url
				item.url = "http://"+item.url;
			}
			item.attachments = attachments;
			
			item.complete();
		});
		translator.translate();
	} else {
		throw "No BibTeX found!";
	}
}

function doWeb(doc, url) {
	var searchRe = /http:\/\/([^\/]+)\/ci?s/;
	var m = searchRe.exec(doc.location.href);
	if(m) {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == 'x') return namespace; else return null;
		} : null;
		
		var items = Zotero.Utilities.getItemArray(doc, doc, "^http://"+m[1]+"/[^/]+.html");
		items = Zotero.selectItems(items);
			
		if(!items) {
			return true;
		}
		
		var urls = new Array();
		for(var i in items) {
			urls.push(i);
		}
		
		Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); });
		Zotero.wait();
	} else {
		scrape(doc);
	}
}