{ "translatorID": "d9b57cd5-5a9c-4946-8616-3bdf8edfcbb5", "label": "mEDRA", "creator": "Aurimas Vinckevicius", "target": "^https?://www\\.medra\\.org/servlet/view\\?", "minVersion": "3.0", "maxVersion": "", "priority": 105, "inRepository": true, "translatorType": 12, "browserSupport": "g", "lastUpdated": "2014-05-26 03:50:55" } function scrapeMasterTable(doc) { var meta = {}; var td = doc.getElementById('contenuto'); if(!td) return false; scrapeTable(td.firstElementChild, meta); if(!Object.keys(meta).length) return false; // DOI not found page return meta; } function scrapeTable(node, meta) { if(!node) return; var section; do { var tagName = node.tagName.split(':').pop(); // drop XHTML prefix if(tagName == 'BR') continue; if(tagName == 'SPAN') { var sectionHeading = ZU.trimInternal(node.textContent).toLowerCase(); switch(sectionHeading) { case 'doi resolution data:': case 'serial article data:': case 'content item data:': case 'metadata:': // Metadata describes the actual item being referenced section = 'top'; break; case 'serial publication data:': case 'journal issue data:': case 'monographic publication data:': // Metadata describes the container section = 'container'; break; default: Zotero.debug('Unknown section: ' + sectionHeading); section = null; } continue; } if(tagName == 'TABLE') { if(node.getElementsByTagName('span').length) { //there are subtables, dig deeper var tr = node.firstElementChild.firstElementChild; while(tr) { if(tr.firstElementChild) { scrapeTable(tr.firstElementChild.firstElementChild, meta); } tr = tr.nextElementSibling; } } else { scrapeMeta(node.firstElementChild.firstElementChild, section, meta); } } } while(node = node.nextElementSibling); } var map = { 'all': { 'doi': 'DOI', 'url': 'url', 'publisher': 'publisher', 'country of publication': 'place', 'issn': 'ISSN', 'product form': 'itemType', 'journal issue number': 'issue', 'language of text': 'language', 'first page': 'firstPage', // Combine with lastPage later 'last page': 'lastPage', 'copyright year': 'cDate', // Copyright date. To be combined with copyright holder later 'copyright owner': 'cOwner', // Copyright holder 'descriptive text': 'abstractNote' }, 'container': { 'full title': 'publicationTitle' }, 'top': { 'full title': 'title' } }; var creatorMap = { author: 'author' // Haven't seen any others yet }; function scrapeMeta(tr, section, meta) { if(!tr) return; do { var label = tr.firstElementChild; var value = ZU.trimInternal(label.nextElementSibling.textContent); label = ZU.trimInternal(label.textContent).toLowerCase(); if(!label || !value) continue; var zLabel = map.all[label] || map[section][label]; if(zLabel) { meta[zLabel] = value; continue; } // Some special cases if(label.indexOf('by ') == 0) { // Authors. Role indicated in first set of parentheses var role = label.match(/\(([^(]+?)\)/); if(role && (role = creatorMap[role[1].trim()])) { if(!meta.creators) meta.creators = []; meta.creators.push({ // We will split this up properly later. Authors may be // already incorrectly split across a number of "authors" if // the supplied metadata used HTML numeric character escapes. // mEDRA seems to think that a semicolon indicates another author lastName: value, creatorType: role }); } else { Zotero.debug("Unknown creator role: " + label); } } else if(label.indexOf('journal issue date') == 0 || label.indexOf('publication date') == 0) { // These all seem to be the same. We "turn" them into ISO dates meta.date = value.replace(/\s*\/\s*/g, '-'); } else if(label.indexOf('other product identifier') == 0) { // Some of these are ISBNs var isbn = ZU.cleanISBN(value); if(isbn) meta.ISBN = isbn; } else { Zotero.debug('Unknwon label: ' + label); } } while(tr = tr.nextElementSibling); } function detectWeb(doc, url) { var meta = scrapeMasterTable(doc); if(!meta) return; return mapItemType(meta); } var itemTypeMap = { DH: 'journalArticle', JB: 'journalArticle', JD: 'journalArticle', BA: 'bookSection' // Haven't seen a DOI for the whole book yet }; function mapItemType(meta) { var value = meta.itemType; delete meta.itemType; // So we don't bother with it later if(value) { var type = value.match(/\(\s*([A-Z]{2})\s*\)/); if(type) { if(!itemTypeMap[type[1]]) { Zotero.debug("Unknown item type: " + value); } else { return itemTypeMap[type[1]]; } } } Z.debug('Using default item type: journalArticle'); return 'journalArticle'; } function doWeb(doc, url) { var meta = scrapeMasterTable(doc); if(!meta) return; var type = mapItemType(meta); var item = new Zotero.Item(type); for(var label in meta) { var value = meta[label]; switch(label) { case 'language': // We only want to the 3 letter code, which is in parentheses var lang = value.match(/\(\s*(\w{3})\s*\)/); if(lang) { value = lang[1].trim(); } break; case 'place': // Don't need the 2 letter code value = value.replace(/\s*\(.*/, ''); break; case 'cDate': case 'cOwner': // Combine whatever we have about the copyright value = '©' + (meta.cDate ? meta.cDate + ' ' : '') + (meta.cOwner || ''); delete meta.cOwner; // Don't bother with this later delete meta.cDate; label = 'rights'; break; case 'firstPage': if(meta.lastPage) { value += '–' + meta.lastPage; } label = 'pages'; break; case 'lastPage': // We deal with this when we encounter firstPage. // Not sure what to do if we just had lastPage. continue; break; case 'creators': // Looks like if mENDRA receives HTML special chars, // it will split the name into multiples on the semicolon // When this happens, the names will end with what look like // HTML numeric character escapes, but without the semicolon // We use this to combine consecutive names and fix the escapes var name; for(var i=0; i