JavaScript语言: 支持豆瓣的zotero的translator
{
"translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
"translatorType":4,
"label":"Douban",
"creator":"Ace Strong<acestrong@gmail.com>",
"target":"^https?://www.douban.com/subject",
"minVersion":"1.0.0",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2009-2-19 20:45:00"
}
function detectWeb(doc, url) {
var articleRe = /subject_search/;
var s = articleRe.exec(url);
if(s) {
return "multiple";
} else {
return "book";
}
return false;
}
function scrape(doc) {
//var namespace = doc.documentElement.namespaceURI;
//var nsResolver = namespace ? function(prefix) {
// if (prefix == "x") return namespace; else return null;
//} : null;
var nsResolver = null;
var itemType = "book";
var newItem = new Zotero.Item(itemType);
Zotero.debug(itemType);
// 附件,网页快照
newItem.attachments.push({document:doc, title:"web snapshot"});
//Zotero.debug(doc);
Zotero.debug("Title:");
// 标题,/html/body/div/h1
var titleTag = doc.evaluate('//html/body/div/h1', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
//Zotero.debug("stage2:");
var title = Zotero.Utilities.trimInternal(titleTag.textContent);
Zotero.debug(title);
newItem.title = title;
// 其他信息,//*[@id="info"]
var info = doc.evaluate('//*[@id="info"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
//Zotero.debug(info);
// 找出作者信息(包括译者)
var dataRows = info.getElementsByTagName("span");
//Zotero.debug(dataRows.length);
var dataRow;
var i = 0;
while(dataRow = dataRows[i]) {
i = i + 1;
var spanTags = dataRow.getElementsByTagName("span");
//Zotero.debug(spanTags.length);
if (spanTags.length>0){
// 作者或译者
var authorType = Zotero.Utilities.trimInternal(spanTags[0].textContent);
var name = Zotero.Utilities.trimInternal(dataRow.getElementsByTagName("a")[0].textContent);
//Zotero.debug(authorType);
//Zotero.debug(name);
if(authorType == "作者"){
newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "author", true));
}else if(authorType == "译者"){
newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "translator", true));
}
}
}
// 提取ISBN,页数,定价,出版社,装帧,出版年信息
var obmo = info.getElementsByTagName("div")[0];
var content = obmo.textContent;
//Zotero.debug(content);
dataRows = obmo.getElementsByTagName("span");
Zotero.debug(dataRows.length);
i = 0;
while(dataRow = dataRows[i]) {
i = i + 1;
var infoType = dataRow.textContent;
//Zotero.debug(infoType);
// 去除无用的信息
content = content.replace(infoType,"");
//Zotero.debug(content);
}
// 去除前后空格
content = content.replace(/(^\s*)|(\s*$)/g, "");
// 将中间的空格转换为","
content = content.replace(/\s+/g, ',');
// 分离信息
var infoContents = content.split(",");
//Zotero.debug("detail info:");
//for (x in infoContents){
// Zotero.debug(infoContents[x]);
//}
// ISBN
newItem.ISBN = infoContents[0];
// 页数
newItem.pages = infoContents[1];
// 出版社
newItem.publisher = infoContents[3];
// 出版年
newItem.date = infoContents[5];
newItem.complete();
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = null;
if(detectWeb(doc, url) == "multiple") {
Zotero.debug("Enter multiple~");
// search page
var items = new Array();
// //*[@id="in_tablem"]
var tablemTag = doc.evaluate('//*[@id="in_tablem"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
var tableTags = tablemTag.getElementsByTagName("table");
Zotero.debug(tableTags.length);
var tableTag;
Zotero.debug("begin to fetch multiple title and link");
var i = 0;
while(tableTag = tableTags[i]) {
i = i + 1;
var tds = tableTag.getElementsByTagName("td");
var title ="";
var link = "";
var as = tds[1].getElementsByTagName("a");
link = as[0].href;
title = as[0].textContent;
Zotero.debug(title);
Zotero.debug(link);
if(link) {
items[link] = Zotero.Utilities.cleanString(title);
}
}
// 让用户选择要保存哪些文献
items = Zotero.selectItems(items);
if(!items) return true;
Zotero.debug("go on processing.");
var urls = new Array();
for(var url in items) {
urls.push(url);
}
} else {
var urls = [url];
}
Zotero.debug(urls);
// 下面对每条url进行解析
Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); });
Zotero.wait();
}
"translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
"translatorType":4,
"label":"Douban",
"creator":"Ace Strong<acestrong@gmail.com>",
"target":"^https?://www.douban.com/subject",
"minVersion":"1.0.0",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2009-2-19 20:45:00"
}
function detectWeb(doc, url) {
var articleRe = /subject_search/;
var s = articleRe.exec(url);
if(s) {
return "multiple";
} else {
return "book";
}
return false;
}
function scrape(doc) {
//var namespace = doc.documentElement.namespaceURI;
//var nsResolver = namespace ? function(prefix) {
// if (prefix == "x") return namespace; else return null;
//} : null;
var nsResolver = null;
var itemType = "book";
var newItem = new Zotero.Item(itemType);
Zotero.debug(itemType);
// 附件,网页快照
newItem.attachments.push({document:doc, title:"web snapshot"});
//Zotero.debug(doc);
Zotero.debug("Title:");
// 标题,/html/body/div/h1
var titleTag = doc.evaluate('//html/body/div/h1', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
//Zotero.debug("stage2:");
var title = Zotero.Utilities.trimInternal(titleTag.textContent);
Zotero.debug(title);
newItem.title = title;
// 其他信息,//*[@id="info"]
var info = doc.evaluate('//*[@id="info"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
//Zotero.debug(info);
// 找出作者信息(包括译者)
var dataRows = info.getElementsByTagName("span");
//Zotero.debug(dataRows.length);
var dataRow;
var i = 0;
while(dataRow = dataRows[i]) {
i = i + 1;
var spanTags = dataRow.getElementsByTagName("span");
//Zotero.debug(spanTags.length);
if (spanTags.length>0){
// 作者或译者
var authorType = Zotero.Utilities.trimInternal(spanTags[0].textContent);
var name = Zotero.Utilities.trimInternal(dataRow.getElementsByTagName("a")[0].textContent);
//Zotero.debug(authorType);
//Zotero.debug(name);
if(authorType == "作者"){
newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "author", true));
}else if(authorType == "译者"){
newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "translator", true));
}
}
}
// 提取ISBN,页数,定价,出版社,装帧,出版年信息
var obmo = info.getElementsByTagName("div")[0];
var content = obmo.textContent;
//Zotero.debug(content);
dataRows = obmo.getElementsByTagName("span");
Zotero.debug(dataRows.length);
i = 0;
while(dataRow = dataRows[i]) {
i = i + 1;
var infoType = dataRow.textContent;
//Zotero.debug(infoType);
// 去除无用的信息
content = content.replace(infoType,"");
//Zotero.debug(content);
}
// 去除前后空格
content = content.replace(/(^\s*)|(\s*$)/g, "");
// 将中间的空格转换为","
content = content.replace(/\s+/g, ',');
// 分离信息
var infoContents = content.split(",");
//Zotero.debug("detail info:");
//for (x in infoContents){
// Zotero.debug(infoContents[x]);
//}
// ISBN
newItem.ISBN = infoContents[0];
// 页数
newItem.pages = infoContents[1];
// 出版社
newItem.publisher = infoContents[3];
// 出版年
newItem.date = infoContents[5];
newItem.complete();
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = null;
if(detectWeb(doc, url) == "multiple") {
Zotero.debug("Enter multiple~");
// search page
var items = new Array();
// //*[@id="in_tablem"]
var tablemTag = doc.evaluate('//*[@id="in_tablem"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
var tableTags = tablemTag.getElementsByTagName("table");
Zotero.debug(tableTags.length);
var tableTag;
Zotero.debug("begin to fetch multiple title and link");
var i = 0;
while(tableTag = tableTags[i]) {
i = i + 1;
var tds = tableTag.getElementsByTagName("td");
var title ="";
var link = "";
var as = tds[1].getElementsByTagName("a");
link = as[0].href;
title = as[0].textContent;
Zotero.debug(title);
Zotero.debug(link);
if(link) {
items[link] = Zotero.Utilities.cleanString(title);
}
}
// 让用户选择要保存哪些文献
items = Zotero.selectItems(items);
if(!items) return true;
Zotero.debug("go on processing.");
var urls = new Array();
for(var url in items) {
urls.push(url);
}
} else {
var urls = [url];
}
Zotero.debug(urls);
// 下面对每条url进行解析
Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); });
Zotero.wait();
}
4 条评论:
Good job!
It's fantastic!
zotero 2.0b4下,不能抓取....
发芽上3月的更新也不行.
很喜欢这个translator,但是能不能再进一步把音乐和电影电视剧和书籍区别开来啊!
发表评论