summaryrefslogtreecommitdiff
path: root/The Australian.js
blob: 0ea2b35b7e9543a176d3b20d70abb339c162ecb7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
{
	"translatorID": "393afc28-212d-47dd-be87-ec51bc7a58a4",
	"label": "The Australian",
	"creator": "Michael Berkowitz",
	"target": "^https?://(searchresults|www.theaustralian)\\.news\\.com\\.au/",
	"minVersion": "1.0.0b3.r1",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "g",
	"lastUpdated": "2014-04-04 10:17:34"
}

function detectWeb(doc, url) {
	if (url == "http://searchresults.news.com.au/servlet/Search" || url.indexOf("siteSearch") != -1) {
		return "multiple";
	} else if (url.indexOf("story") != -1) {
		return "newspaperArticle";
	}
}

function scrape(url) {
	Zotero.Utilities.HTTP.doGet(url, function(text) {
		var newItem = new Zotero.Item("newspaperArticle");
		newItem.url = url;
		newItem.publicationTitle = "The Australian";
		
		//title
		var t = /<title>(.*)<\/title>/;
		newItem.title = Zotero.Utilities.capitalizeTitle(text.match(t)[1].split(" | ")[0]);
		
		//abstract
		var abs = /meta name=\"description\"\s+content=\"(.*)\"/;
		var abstract = Zotero.Utilities.unescapeHTML(text.match(abs)[1]).split(" ");
		abstract[0] = abstract[0][0] + abstract[0].substr(1).toLowerCase();
		newItem.abstractNote = abstract.join(" ");
		
		//tags
		var t = /meta name=\"keywords\"\s+content=\"(.*)\"/;
		var tags = text.match(t)[1].split(/,\s+/);
		for (var i = 0 ; i < tags.length ; i++) {
			newItem.tags.push(Zotero.Utilities.unescapeHTML(tags[i]));
		}

		//section
		var sec = /active\"><a[^>]*>(.*)<\/a>/;
		if (text.match(sec)) {
			newItem.section = text.match(sec)[1];
		}
		
		//timestamp
		var t = /<em class=\"timestamp\">(.*)<\/em>/;
		newItem.date = text.match(t)[1];
		
		//byline
		var by = /<div\s+class=\"module-subheader\"><p>(.*)/;
		if (text.match(by)[1]) {
			var byline = text.match(by)[1];
			var authors = new Array();
			if (byline.indexOf(",") != -1) {
				byline = byline.split(",")[0];
			}
			if (byline.indexOf(" and ") != -1) {
				var authors = byline.split(" and ");
			} else {
				authors.push(byline);
			}
			for (var i = 0 ; i < authors.length ; i++) {
				newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
			}
		}
		
		newItem.complete();
		Zotero.debug(newItem);
		
		Zotero.done();
	}, function() {});
}

function doWeb(doc, url) {
	var URLS = new Array();
	var newItems = new Object();
	if (url == "http://searchresults.news.com.au/servlet/Search") {
		var articles = new Array();
		var xpath = '//ol/li/h4[@class="heading"]/a';
		//var titles = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
		
		newItems = Zotero.Utilities.getItemArray(doc, doc.getElementsByTagName("h4"), /^https?:\/\//);
		newItems = Zotero.selectItems(newItems);
	} else {
		newItems[url] = doc.title.split(" | ")[0]; 
	}

	for (var i in newItems) {
		URLS.push(i);
	}
	
	Zotero.debug(URLS);
	Zotero.Utilities.HTTP.doPost(URLS, "", function(text) {
		for (var i = 0 ; i < URLS.length ; i++) {
			scrape(URLS[i]);
		}
	});
}