Python爬虫破解登陆哔哩哔哩的方法

写在前面

作为一名找不到工作的爬虫菜鸡人士来说,登陆这一块肯定是个比较大的难题。
 从今天开始准备一点点对大型网站进行逐个登陆破解。加深自己爬虫水平。

环境搭建

Python 3.7.7环境,Mac电脑测试
Python内置库
第三方库:rsa、urllib、requests

PC端登陆

全部代码:

\'\'\'PC登录哔哩哔哩\'\'\'
class Bilibili_For_PC():
  def __init__(self, **kwargs):
    for key, value in kwargs.items(): setattr(self, key, value)
    self.session = requests.Session()
    self.__initialize()
  \'\'\'登录函数\'\'\'
  def login(self, username, password, crack_captcha_func=None, **kwargs):
    # 若参数中给入代理,则设置
    self.session.proxies.update(kwargs.get(\'proxies\', {}))
    # 是否需要验证码
    is_need_captcha = False
    while True:
      # 需要验证码
      if is_need_captcha:
        captcha_img = self.session.get(self.captcha_url, headers=self.captcha_headers).content
        data = {\'image\': base64.b64encode(captcha_img).decode(\'utf-8\')}
        captcha = self.session.post(self.crack_captcha_url, json=data).json()[\'message\']
      # 获得key值
      appkey = \'1d8b6e7d45233436\'
      data = {
            \'appkey\': appkey,
            \'sign\': self.__calcSign(\'appkey={}\'.format(appkey))
          }
      response = self.session.post(self.getkey_url, data=data)
      response_json = response.json()
      key_hash = response_json[\'data\'][\'hash\']
      pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(response_json[\'data\'][\'key\'].encode(\'utf-8\'))
      # 模拟登录
      if is_need_captcha:
        data = \"access_key=&actionKey=appkey&appkey={}&build=6040500&captcha={}&challenge=&channel=bili&cookies=&device=pc&password={}&permission=ALL&seccode=&subid=1&ts={}&username={}&validate=\" \\
            .format(appkey, captcha, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt(\'{}{}\'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
      else:
        data = \"access_key=&actionKey=appkey&appkey={}&build=6040500&captcha=&challenge=&channel=bili&cookies=&device=pc&password={}&permission=ALL&seccode=&subid=1&ts={}&username={}&validate=\" \\
            .format(appkey, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt(\'{}{}\'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
      data = \"{}&sign={}\".format(data, self.__calcSign(data))
      response = self.session.post(self.login_url, data=data, headers=self.login_headers)
      response_json = response.json()
      # 不需要验证码, 登录成功
      if response_json[\'code\'] == 0 and response_json[\'data\'][\'status\'] == 0:
        for cookie in response_json[\'data\'][\'cookie_info\'][\'cookies\']:
          self.session.cookies.set(cookie[\'name\'], cookie[\'value\'], domain=\'.bilibili\')
        print(\'[INFO]: Account -> %s, login successfully\' % username)
        infos_return = {\'username\': username}
        infos_return.update(response_json)
        return infos_return, self.session
      # 需要识别验证码
      elif response_json[\'code\'] == -105:
        is_need_captcha = True
      # 账号密码错误
      elif response_json[\'code\'] == -629:
        raise RuntimeError(\'Account -> %s, fail to login, username or password error\' % username)
      # 其他错误
      else:
        raise RuntimeError(response_json.get(\'message\'))
  \'\'\'计算sign值\'\'\'
  def __calcSign(self, param, salt=\"560c52ccd288fed045859ed18bffd973\"):
    sign = hashlib.md5(\'{}{}\'.format(param, salt).encode(\'utf-8\'))
    return sign.hexdigest()
  \'\'\'初始化\'\'\'
  def __initialize(self):
   # 登陆请求头
    self.login_headers = {\'Content-type\': \'application/x-www-form-urlencoded\'}
    # 破解验证码请求头
    self.captcha_headers = {\'Host\': \'passport.bilibili.com\'}
    # 获取key密钥URL
    self.getkey_url = \'https://passport.bilibili.com/api/oauth2/getKey\'
    # 获取登陆URL
    self.login_url = \'https://passport.bilibili.com/api/v3/oauth2/login\'
    # 获取验证码URL
    self.captcha_url = \'https://passport.bilibili.com/captcha\'
    # 破解网站来自: https://github.com/Hsury/Bilibili-Toolkit
    # 破解验证码URL
    self.crack_captcha_url = \'https://bili.dev:2233/captcha\'
    # 请求头都得加这个
    self.session.headers.update({\'User-Agent\': \"Mozilla/5.0 BiliDroid/5.51.1 (bbcallen@gmail.com)\"})

移动端登陆

移动端与PC端类似,网址URL差异以及请求头差异。在此不过多介绍。
 全部代码:

\'\'\'移动端登录B站\'\'\'
class Bilibili_For_Mobile():
  def __init__(self, **kwargs):
    for key, value in kwargs.items(): setattr(self, key, value)
    self.session = requests.Session()
    self.__initialize()
  \'\'\'登录函数\'\'\'
  def login(self, username, password, crack_captcha_func=None, **kwargs):
    self.session.proxies.update(kwargs.get(\'proxies\', {}))
    # 是否需要验证码
    is_need_captcha = False
    while True:
      # 需要验证码
      if is_need_captcha:
        captcha_img = self.session.get(self.captcha_url, headers=self.captcha_headers).content
        data = {\'image\': base64.b64encode(captcha_img).decode(\'utf-8\')}
        captcha = self.session.post(self.crack_captcha_url, json=data).json()[\'message\']
      # 获得key值
      appkey = \'bca7e84c2d947ac6\'
      data = {
            \'appkey\': appkey,
            \'sign\': self.__calcSign(\'appkey={}\'.format(appkey))
          }
      response = self.session.post(self.getkey_url, data=data)
      response_json = response.json()
      key_hash = response_json[\'data\'][\'hash\']
      pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(response_json[\'data\'][\'key\'].encode(\'utf-8\'))
      # 模拟登录
      if is_need_captcha:
        data = \"access_key=&actionKey=appkey&appkey={}&build=6040500&captcha={}&challenge=&channel=bili&cookies=&device=phone&mobi_app=android&password={}&permission=ALL&platform=android&seccode=&subid=1&ts={}&username={}&validate=\" \\
            .format(appkey, captcha, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt(\'{}{}\'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
      else:
        data = \"access_key=&actionKey=appkey&appkey={}&build=6040500&captcha=&challenge=&channel=bili&cookies=&device=phone&mobi_app=android&password={}&permission=ALL&platform=android&seccode=&subid=1&ts={}&username={}&validate=\" \\
            .format(appkey, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt(\'{}{}\'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
      data = \"{}&sign={}\".format(data, self.__calcSign(data))
      response = self.session.post(self.login_url, data=data, headers=self.login_headers)
      response_json = response.json()
      # 不需要验证码, 登录成功
      if response_json[\'code\'] == 0 and response_json[\'data\'][\'status\'] == 0:
        for cookie in response_json[\'data\'][\'cookie_info\'][\'cookies\']:
          self.session.cookies.set(cookie[\'name\'], cookie[\'value\'], domain=\'.bilibili\')
        print(\'[INFO]: Account -> %s, login successfully\' % username)
        infos_return = {\'username\': username}
        infos_return.update(response_json)
        return infos_return, self.session
      # 需要识别验证码
      elif response_json[\'code\'] == -105:
        is_need_captcha = True
      # 账号密码错误
      elif response_json[\'code\'] == -629:
        raise RuntimeError(\'Account -> %s, fail to login, username or password error\' % username)
      # 其他错误
      else:
        raise RuntimeError(response_json.get(\'message\'))
  \'\'\'计算sign值\'\'\'
  def __calcSign(self, param, salt=\"60698ba2f68e01ce44738920a0ffe768\"):
    sign = hashlib.md5(\'{}{}\'.format(param, salt).encode(\'utf-8\'))
    return sign.hexdigest()
  \'\'\'初始化\'\'\'
  def __initialize(self):
    self.login_headers = {
                \'Content-type\': \'application/x-www-form-urlencoded\'
              }
    self.captcha_headers = {
                \'Host\': \'passport.bilibili.com\'
              }
    self.getkey_url = \'https://passport.bilibili.com/api/oauth2/getKey\'
    self.login_url = \'https://passport.bilibili.com/api/v3/oauth2/login\'
    self.captcha_url = \'https://passport.bilibili.com/captcha\'
    # 破解网站来自: https://github.com/Hsury/Bilibili-Toolkit
    self.crack_captcha_url = \'https://bili.dev:2233/captcha\'
    self.session.headers.update({\'User-Agent\': \"Mozilla/5.0 BiliDroid/5.51.1 (bbcallen@gmail.com)\"})
© 版权声明
THE END
喜欢就支持一下吧
点赞0 分享
评论 抢沙发

请登录后发表评论

    暂无评论内容