TV by the Numbers.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

{
	"translatorID": "180a62bf-efdd-4d38-8d85-8971af04dd85",
	"label": "TV by the Numbers",
	"creator": "odie5533",
	"target": "^https?://tvbythenumbers\\.com",
	"minVersion": "1.0",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "g",
	"lastUpdated": "2014-04-04 10:18:02"
}

/*
	TV by the Numbers - translator for Zotero
	Copyright (C) 2010 odie5533

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/*
	This translator supports saving a snapshot of a single post and saving
	the citation of many posts at once without visiting each post. Thus, it does
	not save a snapshot when multiple citations are to be saved.
*/


PUB_TITLE = "TV by the Numbers";
XPATH_TITLE = "//title";
XPATH_PAGES = null;
XPATH_DATE = "substring-after(substring-before(string(//p[@class='posted_on']),' by '), 'on ')";
RE_DATE = /(.*)/;
XPATH_AUTHORS = "substring-after(string(//p[@class='posted_on']),' by ')";
RE_AUTHORS = /(.*)/;

function detectWeb(doc, url) {
	/* site has lots of garbage, check we're on the right doc */
	if (!xpath_string(doc, doc, XPATH_TITLE))
		return;
	var posts = doc.evaluate("count(//div[@class='post-alt blog'])", doc, null,
		XPathResult.NUMBER_TYPE, null).numberValue;
	if (posts  == 1)
		return "webpage";
	else if (posts > 1)
		return "multiple";
}

function xpath_string(doc, node, xpath) {
	var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null);
	if (!res || !res.stringValue)
		return null;
	return Zotero.Utilities.trim(res.stringValue);
}

function xpre(doc, node, xpath, reg) {
	var xpmatch = xpath_string(doc, node, xpath);
	return reg.exec(xpmatch)[1];
}

function scrape(doc, url) {
	var items = new Array();
	var posts = doc.evaluate("//div[@class='post-alt blog']", doc, null,
		XPathResult.ANY_TYPE, null);
		
	var post_count = 0;

	while (post = posts.iterateNext()) {
		var newItem = new Zotero.Item("webpage");
		newItem.publicationTitle = PUB_TITLE;
		
		var link = post.getElementsByTagName("a")[0];
		newItem.url = link.href;
		
		var title = Zotero.Utilities.unescapeHTML(
			Zotero.Utilities.cleanTags(link.textContent));
		title = title.replace(/(\s+)(?:‘|’)|(?:‘|’)(\s+)/g, "$1''$2").replace(/‘|’/g, "'");
		newItem.title = title;
		
		if (XPATH_DATE)
			newItem.date = xpre(doc, post, XPATH_DATE, RE_DATE);
		if (XPATH_PAGES)
			newItem.pages = xpath_string(doc, post, XPATH_PAGES);
		
		//authors
		var author_text = xpre(doc, post, XPATH_AUTHORS, RE_AUTHORS);
		var authors = [];
		if (author_text) {
			if (author_text.indexOf(" and ") != -1)
				authors = author_text.split(" and ");
			else if (author_text.indexOf(";") != -1)
				authors = author_text.split(";");
			else
				authors.push(author_text);
		}
		for each(var a in authors)
			if (a != 'null')
				newItem.creators.push(
					Zotero.Utilities.cleanAuthor(a, "author"));

		// attach html
		if (url == newItem.url)
			newItem.attachments.push({title:PUB_TITLE+" Snapshot",
				mimeType:"text/html", url:doc.location.href, snapshot:true});
		
		newItem.toString = function() { return this.title; };
		items[newItem.url] = newItem;
		post_count++;
	}
	
	/* a stupidly complex way of calling selectItems, and then completing
	   the items which were selected */
	if (post_count > 1) {
		var sel_items = new Object();
		for each(var i in items)
			sel_items[i.url] = i.title;
		sel_items = Zotero.selectItems(sel_items);
		
		for (var i in sel_items)
			items[i].complete();
	} else if (post_count == 1)
		for each(var i in items)
			i.complete();
}

function doWeb(doc, url) {
	scrape(doc, url);
}