summaryrefslogtreecommitdiff
path: root/Library Catalog (SIRSI eLibrary).js
blob: 3448a9c672cec0b16a238dd0320c057a0decb85e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
{
	"translatorID": "23ba3be6-412d-4dde-9cc1-c4df0cc09378",
	"label": "Library Catalog (SIRSI eLibrary)",
	"creator": "Mang Sun",
	"target": "/uhtbin/(?:cgisirsi|quick_keyword)",
	"minVersion": "3.0",
	"maxVersion": "",
	"priority": 250,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "gcsibv",
	"lastUpdated": "2014-08-26 04:08:13"
}

/* Based on the SIRSI translator by Simon Kornblith and Michael Berkowitz,
   and the modifications for Rutgers (IRIS) by Chad Mills.
   Includes code for Spanish version, e.g. PUCP: http://biblioteca.pucp.edu.pe/ (no permalink)
   and UChile www.catalogo.uchile.cl
 */

function detectWeb(doc, url) {
	if (doc.evaluate('//div[@class="columns_container"]/div[contains(@class, "left_column")]/div[@class="content_container"]/div[@class="content"]/form[@id="hitlist"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
		return "multiple";
	} else if (doc.evaluate('//div[@class="columns_container"]/div[contains(@class, "left_column")]/form[@name="item_view"]/div[@class="content_container item_details"]/div[@class="content"]/h3[.="Item Details" or .="Detalles del ítem" or .="Detalle"] ', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
		return "book";
	}
}

function scrape(doc, url) {

	var xpath = '//ul[contains(@class, "detail_page")]/li[@id="detail_marc_record"]/dl/dt[@class="viewmarctags"]';

	var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);

	var elmt = elmts.iterateNext();

	if (!elmt) {
		return false;
	}

	var newItem = new Zotero.Item("book");
	newItem.extra = "";
	newItem.series = "";
	var seriesItemCount = 0;


	while (elmt) {
		try {

			//By Rice. Select all non space text nodes.
			var node = doc.evaluate('./text()[normalize-space()]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext();

			if (node) {

				if (doc.evaluate('following-sibling::dd[position()=1]/a/text()', elmt, null, XPathResult.ANY_TYPE, null).iterateNext()) {

					//By Rice. Some meta data must be retrieved from the text node of anchor tags.
					var value = Zotero.Utilities.superCleanString(doc.evaluate('following-sibling::dd[position()=1]/a/text()[normalize-space()]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
				} else {
					//while other metadata can be retrieved directly from the text node of  DD tags 	
					var value = Zotero.Utilities.superCleanString(doc.evaluate('following-sibling::dd[position()=1]/text()[normalize-space()]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
				}


				//acquire label and create super clean text by removing colon, space and etc.
				casedField = Zotero.Utilities.superCleanString(node.nodeValue);
				field = casedField.toLowerCase();

				//Z.debug(field)
				if (field == "publisher") {
					newItem.publisher = value;
				} else if (field == "pub date") {
					var re = /[0-9]+/;
					var m = re.exec(value);
					newItem.date = m[0];
				} else if (field == "isbn") {
					var re = /^[0-9](?:[0-9X]+)/;
					var m = re.exec(value);
					newItem.ISBN = m[0];
				} else if (field == "title" || field =="titulo" || field == "título") {
					var titleParts = value.split(" / ");
					re = /\[(.+)\]/i;
					if (re.test(titleParts[0])) {
						var ar = re.exec(titleParts[0]);
						var itype = ar[1].toLowerCase();
						if (itype == "phonodisc" || itype == "sound recording") {
							newItem.itemType = "audioRecording";
						} else if (itype == "videorecording") {
							newItem.itemType = "videoRecording";
						} else if (itype == "electronic resource") {
							//newItem.itemType = "webPage";
							//Rice treats eletronic resource as book
							newItem.itemType = "book";
						}
					}
					newItem.title = Zotero.Utilities.capitalizeTitle(titleParts[0]);

				} else if (field == "series"|| field =="serie"||field == "series title") { //push onto item, delimit with semicolon when needed
					if (seriesItemCount != 0) {
						newItem.series += "; " + value;
					} else if (seriesItemCount == 0) {
						newItem.series = value;
					}
					seriesItemCount++; //bump counter
				} else if (field == "dissertation note" || field == "nota de tesis") {
					newItem.itemType = "thesis";
					var thesisParts = value.split("--");
					var uniDate = thesisParts[1].split(", ");
					newItem.university = uniDate[0];
					newItem.date = uniDate[1];
				} else if (field == "edition"|| field =="edicion" || field =="edición") {
					newItem.edition = value;
				} else if (field == "physical description" || field =="descripcion" || field == "descripción física") {
					var physParts = value.split(" : ");
					var physParts = physParts[0].split(" ; ");
					//determine pages, split on " p."
					var physPages = value.split(/ p.*/);
					//break off anything in the beginning before the numbers
					var pageParts = physPages[0].split(" ");
					newItem.numPages = pageParts[pageParts.length - 1];
				} else if (field == "publication info" || field =="pie de imprenta" || field == "datos publicación") {
					var pubParts = value.split(" : ");
					newItem.place = pubParts[0];
					//drop off first part of array and recombine
					pubParts.shift();
					var i;
					var publisherInfo;
					for (i in pubParts) {
						if (i == 0) {
							publisherInfo = pubParts[i] + " : ";
						} else {
							publisherInfo = publisherInfo + pubParts[i] + " : ";
						}
					} //END for
					//drop off last colon
					publisherInfo = publisherInfo.substring(0, (publisherInfo.length - 3));
					//break apart publication parts into Publisher and Date
					var publisherParts = publisherInfo.split(",");
					newItem.publisher = publisherParts[0];
					//check that first character isn't a 'c', if so drop it
					if (publisherParts[1].substring(1, 2) == "c") {
						newItem.date = publisherParts[1].substring(2);
					} else {
						newItem.date = publisherParts[1];
					}
				} else if (field == "personal author" || field=="autor personal")  {
					newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true));
				} else if (field == "performer") {
					newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "performer", true));
				} else if (field == "author") {
					newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true));
				} else if (field == "added author" || field == "otros autores") {
					newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "contributor", true));
				} else if (field == "conference author" || field == "corporate author") {



					//The following line is included by Rice to handle corporate or conference author
					newItem.creators.push(Zotero.Utilities.cleanAuthor(value, "author", true));
				} else if (field == "subject" || field == "corporate subject" || field == "geographic term" || field=="tema" || field=="materia") {
					var subjects = value.split("--");
					newItem.tags = newItem.tags.concat(subjects);
				} else if (field == "personal subject") {
					var subjects = value.split(", ");
					newItem.tags = newItem.tags.push(value[0] + ", " + value[1]);
				} else if (value && field != "http") {
					newItem.extra += casedField + ": " + value + "\n";
				}
			}
		} catch (e) {}
		elmt = elmts.iterateNext();

	} //END if node
	if (newItem.extra) {
		newItem.extra = newItem.extra.substr(0, newItem.extra.length - 1);
	}

	var callNumber = doc.evaluate('//tr/td[1][@class="holdingslist"]/strong/text()|//tr/td[1][@class="holdingslist"]/b/text()', doc, null, XPathResult.ANY_TYPE, null).iterateNext();

	if (callNumber && callNumber.nodeValue) {
		newItem.callNumber = callNumber.nodeValue;
	}

	newItem.libraryCatalog = "Library Catalog";

	newItem.complete();
	return true;
}


function doWeb(doc, url) {

	var sirsiNew = true; //toggle between SIRSI -2003 and SIRSI 2003+
	//Adapted to catch the hitlist page of Rice Catalog
	var xpath = '/html/body/div[@class="columns_container"]/div[contains(@class, "left_column")]/div[@class="content_container"]/div[@class="content"]/form[@id="hitlist"]/ul[@class="hit_list"]/li/ul[starts-with(@class, "hit_list_row")]/li[@class="hit_list_item_info"]/dl';
	if (doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
		Zotero.debug("SIRSI doWeb: searchsum");
		sirsiNew = true;
	} else if (doc.evaluate('//form[@name="hitlist"]/table/tbody/tr', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
		Zotero.debug("SIRSI doWeb: hitlist");
		sirsiNew = false;
	} else if (doc.evaluate('//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
		Zotero.debug("SIRSI doWeb: viewmarctags");
		sirsiNew = true;
	} else if (doc.evaluate('//input[@name="VOPTIONS"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
		Zotero.debug("SIRSI doWeb: VOPTIONS");
		sirsiNew = false;
	} else {
		var elmts = doc.evaluate('/html/body/form//text()', doc, null, XPathResult.ANY_TYPE, null);
		//var elmts = doc.evaluate(' ', doc, null, XPathResult.ANY_TYPE, null);
		while (elmt = elmts.iterateNext()) {
			if (Zotero.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") {
				Zotero.debug("SIRSI doWeb: Viewing record");
				sirsiNew = false;
			}
		} //END while elmts
	} //END FUNCTION doWeb
	// Zotero.debug(xpath);
	if (sirsiNew) { //executes Simon's SIRSI 2003+ scraper code
		if (!scrape(doc)) {

			var checkboxes = new Array();
			var urls = new Array();
			var availableItems = new Array();
			//pull items
			var tableRows = doc.evaluate('//ul[@class="hit_list"]/li/ul[contains(@class, "hit_list_row")][//input[@value="Details" or @value="Detalles"]]', doc, null, XPathResult.ANY_TYPE, null);
			Z.debug(ZU.xpath(doc, '//ul[@class="hit_list"]/li/ul[contains(@class, "hit_list_row")][//input[@value="Details" or @value="Detalles"]]').length)

			// Go through table rows
			while (tableRow = tableRows.iterateNext()) {
				Z.debug("here")
				var input = doc.evaluate('.//input[@value="Details" or @value="Detalles"]', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();

				//var text = doc.evaluate('.//strong', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
				var text = doc.evaluate('.//dd[@class="title"]/a', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
				if (text) {
					availableItems[input.name] = text.trim();
				}
			} //END while
			var items = Zotero.selectItems(availableItems);
			if (!items) {
				return true;
			}

			var hostRe = new RegExp("^http(?:s)?://[^/]+");
			var m = hostRe.exec(doc.location.href);
			Zotero.debug("href: " + doc.location.href);
			var hitlist = doc.forms.namedItem("hitlist");
			var baseUrl = m[0] + hitlist.getAttribute("action") + "?first_hit=" + hitlist.elements.namedItem("first_hit").value + "&last_hit=" + hitlist.elements.namedItem("last_hit").value;
			var alexandria = new Array();
			for (var i in items) {
				alexandria.push(baseUrl + "&" + i + "=Details");
			}
			Zotero.Utilities.processDocuments(alexandria, function (doc) {
				scrape(doc)
			}, function () {
				Zotero.done()
			}, null);
			Zotero.wait();
		} //END if not scrape(doc)
	} else { //executes Simon's SIRSI -2003 translator code
		Zotero.debug("Running SIRSI -2003 code");

		var uri = doc.location.href;
		var recNumbers = new Array();
		var xpath = '//form[@name="hitlist"]/table/tbody/tr';
		var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
		var elmt = elmts.iterateNext();
		if (elmt) { // Search results page
			var uriRegexp = /^https?:\/\/[^\/]+/;
			var m = uriRegexp.exec(uri);
			var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
			var newUri = m[0] + postAction.substr(0, postAction.length - 1) + "40";
			var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
			var items = new Array();
			do {
				var checkbox = doc.evaluate('.//input[@type="checkbox"]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext();
				// Collect title
				var title = doc.evaluate("./td[2]", elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
				if (checkbox && title) {
					items[checkbox.name] = Zotero.Utilities.trimInternal(title);
				}
			} while (elmt = elmts.iterateNext());
			items = Zotero.selectItems(items);
			if (!items) {
				return true;
			}
			for (var i in items) {
				recNumbers.push(i);
			}
		} else { // Normal page
			// this regex will fail about 1/100,000,000 tries
			var uriRegexp = /^((.*?)\/([0-9]+?))\//;
			var m = uriRegexp.exec(uri);
			var newUri = m[1] + "/40"
			var elmts = doc.evaluate('/html/body/form', doc, null, XPathResult.ANY_TYPE, null);
			while (elmt = elmts.iterateNext()) {
				var initialText = doc.evaluate('.//text()[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext();
				if (initialText && initialText.nodeValue && Zotero.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
					recNumbers.push(doc.evaluate('./b[1]/text()[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
					break;
				}
			}
		}
		var translator = Zotero.loadTranslator("import");
		translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
		translator.getTranslatorObject(function(marc) {
			Zotero.Utilities.loadDocument(newUri + '?marks=' + recNumbers.join(",") + '&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=', function (doc) {
				var pre = doc.getElementsByTagName("pre");
				var text = pre[0].textContent;
				var documents = text.split("*** DOCUMENT BOUNDARY ***");
				for (var j = 1; j < documents.length; j++) {
					var uri = newUri + "?marks=" + recNumbers[j] + "&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
					var lines = documents[j].split("\n");
					var record = new marc.record();
					var tag, content;
					var ind = "";
					for (var i = 0; i < lines.length; i++) {
						var line = lines[i];
						if (line[0] == "." && line.substr(4, 2) == ". ") {
							if (tag) {
								content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter + "$1");
								record.addField(tag, ind, content);
							}
						} else {
							content += " " + line.substr(6);
							continue;
						}
						tag = line.substr(1, 3);
						if (tag[0] != "0" || tag[1] != "0") {
							ind = line.substr(6, 2);
							content = line.substr(8);
						} else {
							content = line.substr(7);
							if (tag == "000") {
								tag = undefined;
								record.leader = "00000" + content;
								Zotero.debug("the leader is: " + record.leader);
							}
						}
					} //end FOR
					var newItem = new Zotero.Item();
					record.translate(newItem);
	
					newItem.libraryCatalog = "Library Catalog";
	
					newItem.complete();
				} //end FOR
			});
		});
	} //END while
} //END scrape function
/** BEGIN TEST CASES **/
var testCases = [
	{
		"type": "web",
		"url": "https://library.usc.edu/uhtbin/cgisirsi/x/0/0/5?searchdata1=2420992{CKEY}",
		"items": [
			{
				"itemType": "book",
				"creators": [
					{
						"firstName": "Pierre",
						"lastName": "Bourdieu",
						"creatorType": "author"
					}
				],
				"notes": [],
				"tags": [
					"Real estate business",
					"Social aspects",
					"France",
					"Val-d'Oise",
					"Housing policy",
					"France",
					"Economics",
					"Sociological aspects"
				],
				"seeAlso": [],
				"attachments": [],
				"extra": "Uniform title: Structures sociales de l'économie. English\nGeneral note: Translated from the French\nLanguage: Translated from the French\nBibliography note: Includes bibliographical references (p. [233]-251) and index\nLCCN: 2005620708\nControl Number: ocm61244051",
				"title": "The social structures of the economy",
				"place": "Cambridge, UK ; Malden, MA",
				"publisher": "Polity",
				"date": "2005",
				"numPages": "263",
				"ISBN": "0745625401",
				"libraryCatalog": "Library Catalog"
			}
		]
	}
]
/** END TEST CASES **/