selenium模块
selenium基本概念
selenium优势
便捷的获取网站中动态加载的数据
便捷实现模拟登陆
selenium使用流程:
1.环境安装:pip install selenium
2.下载一个浏览器的驱动程序(谷歌浏览器)
3.实例化一个浏览器对象
基本使用
代码
from selenium import webdriver from lxml import etree from time import sleep if __name__ == \'__main__\': bro = webdriver.Chrome(r\"E:\\google\\Chrome\\Application\\chromedriver.exe\") bro.get(url=\'http://scxk.nmpa.gov.cn:81/xk/\') page_text = bro.page_source tree = etree.HTML(page_text) li_list = tree.xpath(\'//*[@id=\"gzlist\"]/li\') for li in li_list: name = li.xpath(\'./dl/@title\')[0] print(name) sleep(5) bro.quit()
基于浏览器自动化的操作
代码
#编写基于浏览器自动化的操作代码 - 发起请求: get(url) - 标签定位: find系列的方法 - 标签交互: send_ keys( \'xxx\' ) - 执行js程序: excute_script(\'jsCod\') - 前进,后退: back(),forward( ) - 关闭浏览器: quit()
代码
https://www.taobao.com/
from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path=r\"E:\\google\\Chrome\\Application\\chromedriver.exe\") bro.get(url=\'https://www.taobao.com/\') #标签定位 search_input = bro.find_element_by_id(\'q\') sleep(2) #执行一组js代码,使得滚轮向下滑动 bro.execute_script(\'window.scrollTo(0,document.body.scrollHeight)\') sleep(2) #标签交互 search_input.send_keys(\'女装\') button = bro.find_element_by_class_name(\'btn-search\') button.click() bro.get(\'https://www.baidu.com\') sleep(2) bro.back() sleep(2) bro.forward() sleep(5) bro.quit()
selenium处理iframe:
- 如果定位的标签存在于iframe标签之中,则必须使用switch_to.frame(id) - 动作链(拖动) : from selenium. webdriver import ActionChains - 实例化一个动作链对象: action = ActionChains (bro) - click_and_hold(div) :长按且点击操作 - move_by_offset(x,y) - perform( )让动作链立即执行 - action.release( )释放动作链对象
代码
https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable
from selenium import webdriver from time import sleep from selenium.webdriver import ActionChains bro = webdriver.Chrome(executable_path=r\"E:\\google\\Chrome\\Application\\chromedriver.exe\") bro.get(\'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable\') bro.switch_to.frame(\'iframeResult\') div = bro.find_element_by_id(\'draggable\') #动作链 action = ActionChains(bro) action.click_and_hold(div) for i in range(5): action.move_by_offset(17,0).perform() sleep(0.3) #释放动作链 action.release() bro.quit()
selenium模拟登陆QQ空间
代码
https://qzone.qq.com/
from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path=r\"E:\\google\\Chrome\\Application\\chromedriver.exe\") bro.get(\'https://qzone.qq.com/\') bro.switch_to.frame(\"login_frame\") switcher = bro.find_element_by_id(\'switcher_plogin\') switcher.click() user_tag = bro.find_element_by_id(\'u\') password_tag = bro.find_element_by_id(\'p\') user_tag.send_keys(\'1234455\') password_tag.send_keys(\'qwer123\') sleep(1) but = bro.find_element_by_id(\'login_button\') but.click()
无头浏览器和规避检测
代码
from selenium import webdriver from time import sleep #实现无可视化界面 from selenium.webdriver.chrome.options import Options #实现规避检测 from selenium.webdriver import ChromeOptions #实现无可视化界面 chrome_options = Options() chrome_options.add_argument(\'--headless\') chrome_options.add_argument(\'--disable-gpu\') #实现规避检测 option = ChromeOptions() option.add_experimental_option(\'excludeSwitches\',[\'enable-automation\']) bro = webdriver.Chrome(executable_path=r\"E:\\google\\Chrome\\Application\\chromedriver.exe\",chrome_options=chrome_options,options=option) bro.get(\'https://www.baidu.com\') print(bro.page_source) sleep(2) bro.quit()
© 版权声明
THE END
暂无评论内容