下载漫画小脚本

2015-06-18 0 596
下载漫画小脚本
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Copyright (c) 2015, The Sun Technology
This Program could download files from the internet
"""
import urllib2
import os
import time
from urllib2 import HTTPError
from bs4 import BeautifulSoup
from urlparse import urlparse

BASE_URL="/Users/mac/Documents%s"

def get_file_name(req_url):
    path_obj=urlparse(req_url)
    return os.path.split(path_obj.path)

def get_save_path(save_dir):
    dirs=get_file_name(save_dir)
    save_path=BASE_URL%dirs[0]
    if not os.path.exists(save_path):
        os.mkdir(save_path)

def save_files(file_url,file_path):
    start=time.time()
    response=urllib2.urlopen(file_url)
    html=response.read()
    response.close()
    with open(file_path,"wb") as handler:
        handler.write(html)
    print "%s has been downloaded successfully "%file_url
    print "Total cost:%.3f ms"%(time.time()-start)

def download(url_path):
    start = 82
    for pageNum in range(start,start+10):
        try:
            combine_url=url_path%pageNum
            response=urllib2.urlopen(combine_url)
            page=response.read() if response.getcode()==200 else None
            """ Start parsing the HTML from web page"""
            if not page:
                return
            soup = BeautifulSoup(page,"html.parser")
            img_url=soup.find_all('img',id="main-comic")
            #parse the url
            url_parse=urlparse(url_path)
            #rebuild the url
            rebuild_url= url_parse.scheme+':'+img_url[0].get('src')
            #download comic from url
            get_name=get_file_name(rebuild_url)

            save_files(rebuild_url, BASE_URL%'/'.join(get_name))

        except HTTPError, e:
            print "An error has accour",e
            continue
        finally:
            response.close()

if __name__ == '__main__':
    req_url="http://explosm.net/comics/%s"
    get_save_path(req_url)
    download(req_url)

遇见资源网 python 下载漫画小脚本 http://www.ox520.com/15556.html

常见问题

相关文章

发表评论
暂无评论
官方客服团队

为您解决烦忧 - 24小时在线 专业服务