summaryrefslogtreecommitdiff
path: root/National Post.js
blob: 33c7ec4a24fd76c6d3d78146755d2903d5984de3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
{
	"translatorID":"1c5b122c-7e58-4cd5-932b-93f5ca0b7e1a",
	"translatorType":4,
	"label":"National Post",
	"creator":"Adam Crymble",
	"target":"http://www.(national|financial)post.com/",
	"minVersion":"1.0.0b4.r5",
	"maxVersion":"",
	"priority":100,
	"inRepository":true,
	"lastUpdated":"2008-08-11 20:40:00"
}

function detectWeb(doc, url) {
	
	if (doc.title.match("Search Results")) {
		return "multiple";
	} else if (doc.location.href.match("story")) {
		return "newspaperArticle";
	} else if (doc.location.href.match("blog")) {
		return "blogPost";
	}
	
}

function associateData (newItem, dataTags, field, zoteroField) {
	if (dataTags[field]) {
		newItem[zoteroField] = dataTags[field];
	}
}

function scrape(doc) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == "x" ) return namespace; else return null;
	} : null;
	
	var dataTags = new Object();
	var author = new Array();
	
	var mediaType = detectWeb(doc,doc.location.href);
	if (mediaType == "newspaperArticle") {
		var newItem = new Zotero.Item("newspaperArticle");
	
	//metadata	
		var dataTagHTML = doc.getElementsByTagName("meta");
		for (var i = 0 ; i < dataTagHTML.length ; i++) {
			dataTags[dataTagHTML[i].getAttribute("name")] = Zotero.Utilities.cleanTags(dataTagHTML[i].getAttribute("content"));
		}
		
		associateData (newItem, dataTags, "Description", "abstractNote");
		associateData (newItem, dataTags, "PubDate", "date");
		
	//author
		if (dataTags["Author"]) {
			newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["Author"], "author"));
		} else {
		
			author = doc.evaluate('//strong', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(",");
			newItem.creators.push(Zotero.Utilities.cleanAuthor(author[0], "author"));
		}
		
	} else if (mediaType == "blogPost") {
		
		var newItem = new Zotero.Item("blogPost");
		
		var blog = doc.evaluate('//div[@class="entryviewfooter"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
		blog = blog.replace("Posted:", '').split("by");
		newItem.date = blog[0].replace(/^\s*|\s*$/g, '');
		
		var author = doc.evaluate('//span[@class="MoreRecentPostsAuthor"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace("by ", '');
		newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
	}

	Zotero.debug(doc.location.href);
	newItem.url = doc.location.href;
	
	// This is ACTUALLY returning This URL: http://www.nationalpost.com/components/npemail.aspx?id=591742&ref=http://www.nationalpost.com/story.html


	var title1 = doc.title;
	Zotero.debug(title1);
	
	newItem.title = title1;
	newItem.publication = "The National Post";
	newItem.ISSN = 	"1486-8008";
	
	newItem.complete();
}


function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == 'x') return namespace; else return null;
	} : null;
	
	var articles = new Array();
	
	if (detectWeb(doc, url) == "multiple") {
		var items = new Object();
		var titles = doc.evaluate('//h3[@class="alt"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
		var next_title;
		while (next_title = titles.iterateNext()) {
			if (next_title.href.match("nationalpost")) {
				items[next_title.href] = next_title.textContent;
				Zotero.debug(next_title.href);
				Zotero.debug(next_title.textContent);
			}
		}
		items = Zotero.selectItems(items);
		for (var i in items) {
			articles.push(i);
		}
	} else {
		articles = [url];
	}
	Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();});
	Zotero.wait();
	
	
	
}