代码
# 免费的简历模板进行爬取本地保存 # http://sc.chinaz.com/jianli/free.html # http://sc.chinaz.com/jianli/free_2.html import requests from lxml import etree import os dirName = \'./resumeLibs\' if not os.path.exists(dirName): os.mkdir(dirName) headers = { \'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36\' } url = \'http://sc.chinaz.com/jianli/free_%d.html\' for page in range(1,2): if page == 1: new_url = \'http://sc.chinaz.com/jianli/free.html\' else: new_url = format(url%page) page_text = requests.get(url=new_url,headers=headers).text tree = etree.HTML(page_text) a_list = tree.xpath(\'//div[@id=\"container\"]/div/p/a\') for a in a_list: a_src = a.xpath(\'./@href\')[0] a_title = a.xpath(\'./text()\')[0] a_title = a_title.encode(\'iso-8859-1\').decode(\'utf-8\') # 爬取下载页面 page_text = requests.get(url=a_src,headers=headers).text tree = etree.HTML(page_text) dl_src = tree.xpath(\'//div[@id=\"down\"]/div[2]/ul/li[8]/a/@href\')[0] resume_data = requests.get(url=dl_src,headers=headers).content resume_name = a_title resume_path = dirName + \'/\' + resume_name + \'.rar\' with open(resume_path,\'wb\') as fp: fp.write(resume_data) print(resume_name,\'下载成功!\')
爬取结果
以上就是python 爬取免费简历模板网站的示例的详细内容,更多关于python 爬取网站的资料请关注自学编程网其它相关文章!