summaryrefslogtreecommitdiff
path: root/BusinessWeek.js
blob: b7d5bbddc69b790321476d05efad93493d9b7e6c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
	"translatorID": "fb342bae-7727-483b-a871-c64c663c2fae",
	"label": "BusinessWeek",
	"creator": "Michael Berkowitz",
	"target": "^https?://(www\\.|search\\.)?businessweek\\.com",
	"minVersion": "3.0",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "gcsibv",
	"lastUpdated": "2014-02-14 00:21:11"
}

function detectWeb(doc, url) {
	if (doc.evaluate('//body[contains(@class, "searchResults")]|//div[contains(@class, "search_result")]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
		return "multiple";
	} else if (doc.evaluate('//h1[@class="headline"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
		return "magazineArticle";
	}
}

function doWeb(doc, url) {
	var articles = new Array();
	if (detectWeb(doc, url) == "multiple") {
		if (ZU.xpathText(doc, '//h3[@class="story"]/a')){
			var results = doc.evaluate('//h3[@class="story"]/a', doc, null, XPathResult.ANY_TYPE, null);
		}
		else if (ZU.xpathText(doc, '//div[@class="results"]//a[./h4]')){
				var results = doc.evaluate('//div[@class="results"]//a[./h4]', doc, null, XPathResult.ANY_TYPE, null);
		}
		var result;
		var items = new Object();
		while (result = results.iterateNext()) {
			items[result.href] = Zotero.Utilities.trimInternal(result.textContent);
		}
	Zotero.selectItems(items, function (items) {
			if (!items) {
				return true;
			}
			for (var i in items) {
				articles.push(i);
			}
			Zotero.Utilities.processDocuments(articles, scrape, function () {
				Zotero.done();
			});
		});
	} else {
		scrape(doc, url)
	}
}

function scrape(doc, url){
var metaTags = new Object();
		var metas = doc.evaluate('//meta', doc, null, XPathResult.ANY_TYPE, null);
		var meta;
		while (meta = metas.iterateNext()) {
			metaTags[meta.name] = meta.content;
		}
		Zotero.debug(metaTags);
		var item = new Zotero.Item("magazineArticle");
		item.title = ZU.xpathText(doc, '//h1[@class="headline"]');
		if (metaTags['description']) item.abstractNote = metaTags['description'];
		if (metaTags['keywords']) item.tags = metaTags['keywords'].split(/\s*,\s*/);
		//some articles don't have author tags - prevent this from failing
		if (metaTags['author']) {
			var authors = metaTags['author'].split(/\s*and\s*|\s*,\s*/);
			for (i in authors){
				item.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
			}
		}
		item.publicationTitle = "BusinessWeek: " + metaTags['channel'];
		item.url = url;
		item.date = metaTags['pub_date'].replace(/(\d{4})(\d{2})(\d{2})/, "$2/$3/$1").replace(/T.+/, "");
		item.complete();
	}/** BEGIN TEST CASES **/
var testCases = [
	{
		"type": "web",
		"url": "http://www.businessweek.com/management/ten-things-only-bad-managers-say-09232011.html?campaign_id=rss_topStories",
		"items": [
			{
				"itemType": "magazineArticle",
				"creators": [
					{
						"firstName": "Liz",
						"lastName": "Ryan",
						"creatorType": "author"
					}
				],
				"notes": [],
				"tags": [
					"leadership",
					"Management",
					"bad bosses",
					"leaders",
					"Liz Ryan",
					"bad managers",
					"lousy managers"
				],
				"seeAlso": [],
				"attachments": [],
				"title": "Ten Things Only Bad Managers Say",
				"abstractNote": "Nope, it's not just you. These jerks are out there",
				"publicationTitle": "BusinessWeek: management",
				"url": "http://www.businessweek.com/management/ten-things-only-bad-managers-say-09232011.html?campaign_id=rss_topStories",
				"date": "2011-09-23",
				"libraryCatalog": "BusinessWeek",
				"accessDate": "CURRENT_TIMESTAMP"
			}
		]
	},
	{
		"type": "web",
		"url": "http://www.businessweek.com/search?q=linux&resultsPerPage=20&sort=date",
		"items": "multiple"
	}
]
/** END TEST CASES **/