# coding=utf8
#LINUXQQ for crawler data v0.1
import os
import re
import urllib
videourl = ‘http://www.centoscn.com/’
rootdir = ‘D:\\video\\’
def progress(blocknum,blocksize,totalsize):
per = 100.0 * blocknum * blocksize / totalsize
if per > 100:
per = 100
print “%.2f%%”% per
def contact(link,directory):
newlink = link.replace(‘&’,'&’)
newhtml = urllib.urlopen(str(videourl + newlink))
newdata = re.compile(‘
if req:
os.mkdir(rootdir + directory )
download = req.group(1).replace('html','swf')
if urllib.urlretrieve(download,rootdir + directory + '\\study.swf',progress):
print directory + 'download ok'
else:
print directory + 'download failure'
def crawler(url):
html = urllib.urlopen('http://www.51zxw.net/list.aspx?cid=359')
data = re.compile(r' ]*?>(.*?)‘,re.S|re.U)
req = re.findall(data,html.read())
for i in req:
contact(i[0],i[1])\s.*?>
if __name__==’__main__’:
i = 8
p = 1
while p <= i:
url = ‘http://www.51zxw.net/list.aspx?page=%d&cid=359′ % (p)
p += 1
crawler(url)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。