Python批量抓取图片

2015-08-13 0 683
Python批量抓取图片
# -*- coding:utf-8 -*-
# coding=UTF-8

import os,urllib,urllib2,re

url = u"http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=python&oq=python&rsp=-1"
outpath = "t:\\"

def getHtml(url):
    webfile = urllib.urlopen(url)
    outhtml = webfile.read()
    print outhtml
    return outhtml

def getImageList(html):
    restr=ur'('
    restr+=ur'http:\/\/[^\s,"]*\.jpg'
    restr+=ur'|http:\/\/[^\s,"]*\.jpeg'
    restr+=ur'|http:\/\/[^\s,"]*\.png'
    restr+=ur'|http:\/\/[^\s,"]*\.gif'
    restr+=ur'|http:\/\/[^\s,"]*\.bmp'
    restr+=ur'|https:\/\/[^\s,"]*\.jpeg'    
    restr+=ur'|https:\/\/[^\s,"]*\.jpeg'
    restr+=ur'|https:\/\/[^\s,"]*\.png'
    restr+=ur'|https:\/\/[^\s,"]*\.gif'
    restr+=ur'|https:\/\/[^\s,"]*\.bmp'
    restr+=ur')'
    htmlurl = re.compile(restr)
    imgList = re.findall(htmlurl,html)
    print imgList
    return imgList

def download(imgList, page):
    x = 1
    for imgurl in imgList:
        filepathname=str(outpath+'pic_%09d_%010d'%(page,x)+str(os.path.splitext(urllib2.unquote(imgurl).decode('utf8').split('/')[-1])[1])).lower()
        print '[Debug] Download file :'+ imgurl+' >> '+filepathname
        urllib.urlretrieve(imgurl,filepathname)
        x+=1

def downImageNum(pagenum):
    page = 1
    pageNumber = pagenum
    while(page <= pageNumber):
        html = getHtml(url)#获得url指向的html内容
        imageList = getImageList(html)#获得所有图片的地址,返回列表
        download(imageList,page)#下载所有的图片
        page = page+1

if __name__ == '__main__':
    downImageNum(1)


遇见资源网 python Python批量抓取图片 http://www.ox520.com/15670.html

常见问题

相关文章

发表评论
暂无评论
官方客服团队

为您解决烦忧 - 24小时在线 专业服务