2009年2月27日星期五
wps的zotero插件
2009年2月23日星期一
这两天忙了点啥
- SPADE,一个python实现的多智能体开发环境,使用XMPP/Jabber,还专门为JADE开发了一个XMPP的MTP插件,但在Windows下不好用,试了几个小时也没搞定,就没兴趣再整了……
- Kalman滤波器,看了原理,很简单,关键是人家写的好,还找到了一个用python实现的代码,里面给出了我看的那篇文章的链接,看来还蛮火的~:)
- Wireshark,一个开源的功能强悍的网络包抓取和分析工具,试用了一下,蛮好用的,这里还有一个中文的教程,以前我咋就不知道有这样的好东西呢!
- 在云端上找到一个timeEdition软件,可以记录在电脑上干了些什么,并且可以将结果导出放到Google calendar上,真是太和我的心意了~阿军听我介绍后马上又找出一个叫rescue time的时间记录软件,功能好像也蛮强大的~
2009年2月19日星期四
完成支持豆瓣的Zotero的translator
"translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
"translatorType":4,
"label":"Douban",
"creator":"Ace Strong<acestrong@gmail.com>",
"target":"^https?://www.douban.com/subject",
"minVersion":"1.0.0",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2009-2-19 20:45:00"
}
function detectWeb(doc, url) {
var articleRe = /subject_search/;
var s = articleRe.exec(url);
if(s) {
return "multiple";
} else {
return "book";
}
return false;
}
function scrape(doc) {
//var namespace = doc.documentElement.namespaceURI;
//var nsResolver = namespace ? function(prefix) {
// if (prefix == "x") return namespace; else return null;
//} : null;
var nsResolver = null;
var itemType = "book";
var newItem = new Zotero.Item(itemType);
Zotero.debug(itemType);
// 附件,网页快照
newItem.attachments.push({document:doc, title:"web snapshot"});
//Zotero.debug(doc);
Zotero.debug("Title:");
// 标题,/html/body/div/h1
var titleTag = doc.evaluate('//html/body/div/h1', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
//Zotero.debug("stage2:");
var title = Zotero.Utilities.trimInternal(titleTag.textContent);
Zotero.debug(title);
newItem.title = title;
// 其他信息,//*[@id="info"]
var info = doc.evaluate('//*[@id="info"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
//Zotero.debug(info);
// 找出作者信息(包括译者)
var dataRows = info.getElementsByTagName("span");
//Zotero.debug(dataRows.length);
var dataRow;
var i = 0;
while(dataRow = dataRows[i]) {
i = i + 1;
var spanTags = dataRow.getElementsByTagName("span");
//Zotero.debug(spanTags.length);
if (spanTags.length>0){
// 作者或译者
var authorType = Zotero.Utilities.trimInternal(spanTags[0].textContent);
var name = Zotero.Utilities.trimInternal(dataRow.getElementsByTagName("a")[0].textContent);
//Zotero.debug(authorType);
//Zotero.debug(name);
if(authorType == "作者"){
newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "author", true));
}else if(authorType == "译者"){
newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "translator", true));
}
}
}
// 提取ISBN,页数,定价,出版社,装帧,出版年信息
var obmo = info.getElementsByTagName("div")[0];
var content = obmo.textContent;
//Zotero.debug(content);
dataRows = obmo.getElementsByTagName("span");
Zotero.debug(dataRows.length);
i = 0;
while(dataRow = dataRows[i]) {
i = i + 1;
var infoType = dataRow.textContent;
//Zotero.debug(infoType);
// 去除无用的信息
content = content.replace(infoType,"");
//Zotero.debug(content);
}
// 去除前后空格
content = content.replace(/(^\s*)|(\s*$)/g, "");
// 将中间的空格转换为","
content = content.replace(/\s+/g, ',');
// 分离信息
var infoContents = content.split(",");
//Zotero.debug("detail info:");
//for (x in infoContents){
// Zotero.debug(infoContents[x]);
//}
// ISBN
newItem.ISBN = infoContents[0];
// 页数
newItem.pages = infoContents[1];
// 出版社
newItem.publisher = infoContents[3];
// 出版年
newItem.date = infoContents[5];
newItem.complete();
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = null;
if(detectWeb(doc, url) == "multiple") {
Zotero.debug("Enter multiple~");
// search page
var items = new Array();
// //*[@id="in_tablem"]
var tablemTag = doc.evaluate('//*[@id="in_tablem"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
var tableTags = tablemTag.getElementsByTagName("table");
Zotero.debug(tableTags.length);
var tableTag;
Zotero.debug("begin to fetch multiple title and link");
var i = 0;
while(tableTag = tableTags[i]) {
i = i + 1;
var tds = tableTag.getElementsByTagName("td");
var title ="";
var link = "";
var as = tds[1].getElementsByTagName("a");
link = as[0].href;
title = as[0].textContent;
Zotero.debug(title);
Zotero.debug(link);
if(link) {
items[link] = Zotero.Utilities.cleanString(title);
}
}
// 让用户选择要保存哪些文献
items = Zotero.selectItems(items);
if(!items) return true;
Zotero.debug("go on processing.");
var urls = new Array();
for(var url in items) {
urls.push(url);
}
} else {
var urls = [url];
}
Zotero.debug(urls);
// 下面对每条url进行解析
Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); });
Zotero.wait();
}
2009年2月18日星期三
用mingw编译boost,在Code::Block中使用
build.bat mingw然后将bjam.exe拷贝到根目录下(任何目录都可以啦),加入系统PATH中。回到boost的根目录,输入命令:
bjam --toolset=gcc "--prefix=D:\Boost" install然后就是等待了~
2009年2月16日星期一
2009年2月13日星期五
2009年2月12日星期四
相信我,你并不孤单!
> "D:\Python25\pythonw.exe" -u "F:\projects\svr\pso.py"显然,istPSO效果最好,但我还是不太满意,准备做进一步的优化!
training with function: Sphere
training with pso: bPSO
average value = 33895.368806, best value = 16366.455677
training with pso: sPSO
average value = 713.990013, best value = 5.145062
training with pso: tPSO
average value = 31565.762681, best value = 12586.708452
training with pso: stPSO
average value = 620.577996, best value = 5.207246
training with pso: istPSO
average value = 0.015338, best value = 0.004548
best pso is istPSO
training with function: Griewank
training with pso: bPSO
average value = 9.637782, best value = 4.977231
training with pso: sPSO
average value = 0.735644, best value = 0.191848
training with pso: tPSO
average value = 8.538446, best value = 4.857032
training with pso: stPSO
average value = 0.557846, best value = 0.187582
training with pso: istPSO
average value = 0.045185, best value = 0.000121
best pso is istPSO
training with function: Rosenbrock
training with pso: bPSO
average value = 9618082261.674509, best value = 2725173927.465609
training with pso: sPSO
average value = 179243235.044173, best value = 631.653198
training with pso: tPSO
average value = 8046176465.674629, best value = 1196837169.500892
training with pso: stPSO
average value = 2338.451929, best value = 612.315377
training with pso: istPSO
average value = 367129077.608837, best value = 29.142729
best pso is istPSO
training with function: Rastrigin
training with pso: bPSO
average value = 34903.229750, best value = 20759.712907
training with pso: sPSO
average value = 378.882609, best value = 186.164672
training with pso: tPSO
average value = 29756.547518, best value = 13739.131209
training with pso: stPSO
average value = 785.898331, best value = 205.353543
training with pso: istPSO
average value = 91.394043, best value = 1.290095
best pso is istPSO
再来说说今天看到的一个帖子,前面已经给了一个链接,从TopLanguage里看到的,点击进去看了,发现是个豆瓣的帖子,楼主开篇的话让我觉得“并不孤单”!笑来的话再次应验了:相信我,你并不孤单!
明天还要考试,今晚不能太迟睡了,再看会这个帖子,看看楼主是如何攻克拖延症的~晚上的批量测试程序就是在看到这个帖子后才开始做的,看到有这么多人和我有一样的痛苦,不觉中痛苦就减轻了很多,嘿嘿:)
2009年2月9日星期一
2009年2月4日星期三
[转]Python:time.clock() vs. time.time()
Python:time.clock() vs. time.time()
Posted by Ross Wan on 九月 19, 2008
有时候,我们需要知道程序或者当中的一段代码的执行速度,于是就会加入一段计时的代码,如下:
start = time.clock()
... do something
elapsed = (time.clock() - start)
又或者
start = time.time()
... do something
elapsed = (time.time() - start)
那究竟 time.clock() 跟 time.time(),谁比较精确呢?带着疑问,查了 Python 的 time 模块文档,当中 clock() 方法有这样的解释:
clock()
On
Unix, return the current processor time as a floating point number
expressed in seconds. The precision, and in fact the very definition of
the meaning of “processor time”, depends on that of the C function of
the same name, but in any case, this is the function to use for
benchmarking Python or timing algorithms.
On Windows, this
function returns wall-clock seconds elapsed since the first call to
this function, as a floating point number, based on the Win32 function
QueryPerformanceCounter(). The resolution is typically better than one
microsecond.
可见,time.clock() 返回的是处理器时间,而因为 Unix 中 jiffy 的缘故,所以精度不会太高。
总结
究竟是使用 time.clock() 精度高,还是使用 time.time() 精度更高,要视乎所在的平台来决定。总概来讲,在 Unix 系统中,建议使用 time.time(),在 Windows 系统中,建议使用 time.clock()。
这个结论也可以在 Python 的 timtit 模块中(用于简单测量程序代码执行时间的内建模块)得到论证:
if sys.platform == "win32":
# On Windows, the best timer is time.clock()
default_timer = time.clock
else:
# On most other platforms the best timer is time.time()
default_timer = time.time
使用 timeit 代替 time,这样就可以实现跨平台的精度性:
start = timeit.default_timer()
... do something
elapsed = (timeit.default_timer() - start)
2009年2月2日星期一
多维粒子群(PSO)算法
先从这个网站找到一个简单并且有点不靠谱的一维PSO的python实现。特点是简单,容易看懂,并且用pygame将优化过程保存成图片,方便以后查看。改为多维的实现,能运行后发现调用libsvm进行交叉校验每次都会打印出信息,大大减慢了算法的速度!于是昨晚就调试怎么去掉这个了~
今天早上起来想到那个保存成图片的功能不错,多维算法可以指定其中一维来实现,大不了多搞几个这样的类给PSO,分别保存每一维的优化过程就是了!实现后发现有问题,蓝色的最优点居然会跑到fitness函数外面!幸亏这个保存图片的功能,不然还真发现不了这个bug呢!经过调试发现是因为复制列表时仅复制了引用,这样在种群继续进化时就直接导致最优点的值也变了!找到问题就好办了,用deepcopy复制列表就OK啦~
多维PSO的代码如下:
# Copyright (C) 2009, Ace Strong <acestrong@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id: pso.py,v 2.0 2009/02/02 10:00:45 ace Exp $
from random import uniform
from copy import deepcopy
class PSO:
def __init__(self, pop_size, particle_size, particle_scope, C1, C2, w, max_w, min_w, max_iter, func, extra_args=None):
'''
pop_size: size of population
particle_size: dimension of each particle
particle_scope: value range of each particle's dimension
C1: cognitive factor following personal best
C2: social facotr following global best
w: const inertial factor
min_w: min range of inertial factor
max_w: max range of inertial factor
max_iter: max iteration of algrithm
func: fitness evaluation function
'''
# fitness function
self.func = func
# used for fitness function
self.extra_args = extra_args
self.pop = []
# converging factor
self.r = 0.729
self.particle_size = particle_size
# parse range of each dimension
self.min_range = particle_scope[::2]
self.max_range = particle_scope[1::2]
# 0: position, 1: velocity, 2: fitness
for i in xrange(pop_size):
self.pop.append(self.initParticle())
self.evaluate()
# global best
self.gbest = deepcopy(self.pop[0])
# personal best, each partical has one
self.pbest = deepcopy(self.pop)
# weight following personal best, cognitive factor
self.C1 = C1
# weight following global best, social facotr
self.C2 = C2
# weight following current speed, inertial factor
self.w = w
# used by un-const method
self.max_w = max_w
self.min_w = min_w
# max iteration number
self.max_iter = max_iter
# current iteration number
self.curr_iter = 0
def initParticle(self):
particle = []
# position, in particle_size dimension
position = []
for j in xrange(self.particle_size):
position.append(uniform(self.min_range[j], self.max_range[j]))
particle.append(position)
# velocity, in particle_size dimension
velocity = []
for j in xrange(self.particle_size):
velocity.append(uniform(-1, 1))
particle.append(velocity)
# fitness
particle.append(0)
return particle
def update_velocity(self):
# linear descending w
w = self.max_w - self.curr_iter*((self.max_w - self.min_w)/self.max_iter)
# # fixed w
# w = self.w
# # nonlinear descending w, concave function
# w = (self.max_w - self.min_w)*(self.curr_iter/self.max_iter)**2 \
# + (self.min_w - self.max_w)*(2*self.curr_iter/self.max_iter) \
# + self.max_w
# # nonlinear descending w, concave function
# w = self.min_w*(self.max_w/self.min_w)**(1/(1+10*self.curr_iter/self.max_iter))
i = 0
for p in self.pop:
for j in xrange(self.particle_size):
p[1][j] = w * p[1][j] + uniform(0, self.C1) * (self.pbest[i][0][j] \
- p[0][j]) + uniform(0, self.C2) * (self.gbest[0][j] - p[0][j])
i += 1
def evaluate(self):
for p in self.pop:
p[2] = self.func(p[0], self.extra_args)
def move(self):
i = 0
for p in self.pop:
for j in xrange(self.particle_size):
p[0][j] += self.r * p[1][j]
if p[0][j] > self.max_range[j] or p[0][j] < self.min_range[j]:
self.pop[i] = self.initParticle()
break
# if self.func(self.gbest[0],self.extra_args)!=self.gbest[2]:
# print "error in move!"
i += 1
def run(self, update_func=None):
for i in xrange(self.max_iter):
print "current iter = %d" % self.curr_iter
if update_func:
update_func()
self.update_velocity()
self.move()
self.evaluate()
# current best of personal bests
cpbest = deepcopy(self.pbest[0])
for k in xrange(len(self.pop)):
if self.pop[k][2] < self.pbest[k][2]:
self.pbest[k] = deepcopy(self.pop[k])
if self.pbest[k][2] < cpbest[2]:
cpbest = self.pbest[k]
if cpbest[2] < self.gbest[2]:
# print "changing gbest from %s" % self.gbest
self.gbest = cpbest
# print "to %s" % self.gbest
# if self.func(self.gbest[0],self.extra_args)!=self.gbest[2]:
# print "error in run!"
# print self.gbest
self.curr_iter += 1
def __str__(self):
ret = ""
for i in self.pop:
ret += str(i) + "\n"
return ret
import pygame
import time
class PygamePrinter:
'''draw the given dimension's evolution
'''
def __init__(self, pso, w=400, h=300, dimension=0, extra_args=None):
self.calls = 0
self.w = w
self.h = h
self._init_pygame()
self.pso = pso
self.dimension = dimension
self.extra_args = extra_args
def _init_pygame(self):
self.screen = pygame.display.set_mode((self.w, self.h), 0, 8)
self.backcolor = (0, 0, 0)
self.funccolor = (255, 255, 255)
self.partcolor = (255, 0, 0)
self.elitecolor = (0, 0, 255)
def draw_point(self, color, x, y, size=3):
pygame.draw.rect(self.screen, color, (x - size, y - size, \
size*2, size*2))
def p2p(self, x, y):
return (x + 1) * 200, y * 300
def draw_func(self):
for i in range(self.w):
x = i / (self.w / float((self.pso.max_range[self.dimension] \
- self.pso.min_range[self.dimension]))) \
+ self.pso.min_range[self.dimension]
y = self.pso.func((x, self.dimension), self.extra_args)
rh = y * (self.h / 2.) + (self.h / 2.)
self.draw_point(self.funccolor, i, rh, 1)
def _draw_xy(self, color, x, y):
self.draw_point(color, (x + self.pso.max_range[self.dimension]) * self.w \
/ float((self.pso.max_range[self.dimension] - self.pso.min_range[self.dimension])), \
(y + 1)*self.h/2)
def draw_pop(self):
for i in self.pso.pop:
self._draw_xy(self.partcolor, i[0][self.dimension], i[2])
i = self.pso.gbest
self._draw_xy(self.elitecolor, i[0][self.dimension], i[2])
def __call__(self):
#time.sleep(0.5)
self.screen.fill(self.backcolor)
self.screen.lock()
self.draw_func()
self.draw_pop()
self.screen.unlock()
pygame.display.flip()
if self.calls % 2 == 0:
# print "call=%d" % self.calls
pygame.image.save(self.screen, "pso-%d.bmp" % self.calls)
self.calls += 1
import math
def testFunc(arg, extra_args):
x = arg[0]
return math.cos(x) * math.exp(math.sin(x)) * math.sin(x) / 1.5
def test():
import math
# func = lambda x:math.cos(x*math.sin(x*0.3)-x) / 1.5
# func = lambda x:math.cos(x) * math.exp(math.sin(x)) * math.sin(x) / 1.5
p = PSO(15, 1, (-4.5, 4.5), C1=2, C2=2, w=0.5, max_w=0.95, min_w=0.4, max_iter=20, func=testFunc)
printer = PygamePrinter(p)
p.run(update_func=printer)
# p.run()
print p
print p.gbest
if __name__ == "__main__":
test()