爬取”顶点小说网“《纯阳剑尊》
代码
import requests from bs4 import BeautifulSoup # 反爬 headers = { \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, \\ like Gecko) Chrome/70.0.3538.102 Safari/537.36\' } # 获得请求 def open_url(url): response = requests.get(url, headers=headers) response.encoding = response.apparent_encoding html = response.text return html # 提取标题 def get_title(url): soup = BeautifulSoup(url, \'lxml\') title_tag = soup.find(\'dd\') title = \'\\n\' + title_tag.h1.get_text() + \'\\n\' return title # 提取文本 def get_texts(url): soup2 = BeautifulSoup(url, \'lxml\') text_tags = soup2.find_all(\'dd\', id=\"contents\") return text_tags # 保存标题 def save_title(filename, title): with open(filename, \'a+\', encoding=\'utf-8\') as file: file.write(title) # 保存文本 def save_text(filename, text): with open(filename, \'a+\', encoding=\'utf-8\') as file: file.write(text) # 主程序函数 def main(): num = input(\'《纯阳剑尊》你想要下载第几章?(1-802)\') num = int(num) number = 8184027 + num url = \'https://www.23us.so/files/article/html/15/15905/\' + str(number) + \'.html\' filename = \'纯阳剑尊.txt\' r = open_url(url) title = get_title(r) tags = get_texts(r) save_title(filename, title) for text_tag in tags: text = text_tag.get_text() + \'\\n\' save_text(filename, text) print(\'第{}章已经下载完成!\'.format(num)) if __name__ == \'__main__\': main()
爬取结果:
以上就是python爬取”顶点小说网“《纯阳剑尊》的示例代码的详细内容,更多关于python 爬取顶点小说网的资料请关注自学编程网其它相关文章!