summaryrefslogtreecommitdiff
path: root/The Hindu (old).js
blob: fa0f6ed0e529501ab5736b6d1fc0f7ec2a88e380 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
{
	"translatorID": "9499c586-d672-42d6-9ec4-ee9594dcc571",
	"label": "The Hindu (old)",
	"creator": "Prashant Iyengar and Michael Berkowitz",
	"target": "^https?://(www\\.)?hindu\\.com",
	"minVersion": "1.0.0b4.r5",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "gcsibv",
	"lastUpdated": "2014-04-04 09:55:32"
}

function detectWeb(doc, url) {
	if (doc.evaluate('//h2[@class="r"]/a[@class="l"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
	  		return "multiple";
	  	} else {
		  	return "newspaperArticle";
	}
}

function regexMeta(str, item) {
	var re = /NAME\=\"([\w\W]*?)\"\s+CONTENT\=\"([\w\W]*?)\"/;
	var stuff = str.match(re);
		if (stuff)
		{
		if (stuff[1] == "PAGEHEAD") {
		item.section = stuff[2].split(/\s+/)[0];
	}
	if (stuff[1] == "ZONE") {
		item.place = stuff[2].split(/\s+/)[0];
	}
	if (stuff[1] == "EXPORTTIME") {
		item.date = stuff[2].split(/\s+/)[0];
	}
	if (stuff[1] == "PAGENUMBER") {
		item.pages = stuff[2].split(/\s+/)[0];
	}
	}
}

function doWeb(doc, url) {
	var arts = new Array();
	if (detectWeb(doc, url) == "multiple") {
		var xpath = '//h2[@class="r"]/a[@class="l"]';
		var links = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
		var link;
		var items = new Object();
		while (link = links.iterateNext()) {
			items[link.href] = link.textContent;
		}
		items = Zotero.selectItems(items);
		for (var i in items) {
			arts.push(i);
		}
		
	} else { arts = [url]; }
	for each (var art in arts) {
		Zotero.debug(art);
		Zotero.Utilities.HTTP.doGet(art, function(text) {
			var newItem = new Zotero.Item("newspaperArticle");
			newItem.publicationTitle = "The Hindu";
			newItem.url = art;
			//title
			var t = /\<TITLE\>[\w\W]*\:([\w\W]*?)<\/TITLE/;
			newItem.title = Zotero.Utilities.unescapeHTML(Zotero.Utilities.capitalizeTitle(text.match(t)[1]));
	
			var auth = 	/\<font class\=storyhead[\w\W]*?justify\>([\w\W]*?)\<p\>/;
			if (text.match(auth))
			{
				//newItem.author=Zotero.Utilities.cleanAuthor(text.match(auth)[1]);
				cleanauth=Zotero.Utilities.cleanTags(text.match(auth)[1]);
				newItem.creators.push(Zotero.Utilities.cleanAuthor(cleanauth, "author"));	
			}
	
			newItem.publicationTitle="The Hindu";
			
			newItem.attachments = [{"title":"The Hindu Snapshot", mimeType:"text/html", url:art}];
	
			//hooray for real meta tags!
			var meta = /<META NAME[\w\W]*?\>/g;
			var metaTags = text.match(meta);
			for (var i = 0 ; i <metaTags.length ; i++) {
				regexMeta(metaTags[i], newItem);
			}
			newItem.complete();
			Zotero.done();
		});
		Zotero.wait();
	}
}/** BEGIN TEST CASES **/
var testCases = [
	{
		"type": "web",
		"url": "http://www.hindu.com/lr/2004/01/04/stories/2004010400030100.htm",
		"items": [
			{
				"itemType": "newspaperArticle",
				"creators": [
					{
						"firstName": "To be torn between two languages, discovers H. MASUD TAJ, is to drown soul-deep in the",
						"lastName": "present",
						"creatorType": "author"
					}
				],
				"notes": [],
				"tags": [],
				"seeAlso": [],
				"attachments": [
					{
						"title": "The Hindu Snapshot",
						"mimeType": "text/html"
					}
				],
				"publicationTitle": "The Hindu",
				"url": "http://www.hindu.com/lr/2004/01/04/stories/2004010400030100.htm",
				"title": "Falling at the speed of light",
				"date": "01-01-2004",
				"pages": "01",
				"place": "CHEN",
				"section": "LITERARY",
				"libraryCatalog": "The Hindu (old)"
			}
		]
	}
]
/** END TEST CASES **/