Python基于爬虫实现全网搜索并下载音乐

现在写一篇博客总是喜欢先谈需求或者本内容的应用场景，是的，如果写出来的东西没有任何应用价值，确实也没有实际意义。今天的最早的需求是来自于如何免费[白嫖]下载全网优质音乐，我去b站上面搜索到了一个大牛做过的一个歌曲搜素神器，界面是这样的：

确实很好用的，而且涵盖了互联网上面大多数主流的音乐网站，涉及到的版本也很多，可谓大而全，但是一个技术人的追求远远不会如此，于是我就想去了解其中背后的原理，因为做过网络爬虫的人都知道，爬虫只能爬取某一页或者某些页的网站资源，所以我很好奇它背后是怎么实现的？
笔者一直以来都是做的基于Python3.7版本的网络爬虫，所以本文也是基于此来学习记录的。首先爬取的网站不是对应的音乐网站，而是一个音乐直链搜索，那里汇聚了大多数音频音乐的解析功能，界面如下：

Python基于爬虫实现全网搜索并下载音乐

所以自制一个搜索引擎的思想也很直观了，那就是利用第三方的接口，直接对该服务器发起请求即可，然后将获取的数据进行解析保存。这里以最近比较火的歌曲“白月光与朱砂痣”下载为例：

Python基于爬虫实现全网搜索并下载音乐

解析的结果如上，获取的数据格式是json类型，并且除了要有歌名以外，还要有音乐平台。

源代码实现如下：

import requests
import jsonpath
import os
\"\"\"
  1.url
  2.模拟浏览器请求
  3.解析网页源代码
  4.保存数据
\"\"\"
def song_download(url,title,author):
  # 创建文件夹
  os.makedirs(\"music\",exist_ok=True)
  path = \'music\\{}.mp3\'.format(title)
  print(\'歌曲:{0}-{1},正在下载...\'.format(title,author))
  # 下载（这种读写文件的下载方式适合少量文件的下载）
  content = requests.get(url).content
  with open(file = title + author + \'.mp3\',mode=\'wb\') as f:
    f.write(content)
  print(\'下载完毕,{0}-{1},请试听\'.format(title,author))

def get_music_name():
  \"\"\"
  搜索歌曲名称
  :return:
  \"\"\"
  name = input(\"请输入歌曲名称:\")
  print(\"1.网易云:netease\\n2.QQ:qq\\n3.酷狗:kugou\\n4.酷我:kuwo\\n5.百度:baidu\\n6.喜马拉雅:ximalaya\")
  platfrom = input(\"输入音乐平台类型:\")
  print(\"-------------------------------------------------------\")
  url = \'https://music.liuzhijin.cn/\'
  headers = {
    \"user-agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36\",
    # 判断请求是异步还是同步
    \"x-requested-with\":\"XMLHttpRequest\",
  }
  param = {
    \"input\":name,
    \"filter\":\"name\",
    \"type\":platfrom,
    \"page\": 1,
  }
  res = requests.post(url=url,data=param,headers=headers)
  json_text = res.json()

  title = jsonpath.jsonpath(json_text,\'$..title\')
  author = jsonpath.jsonpath(json_text,\'$..author\')
  url = jsonpath.jsonpath(json_text, \'$..url\')
  if title:
    songs = list(zip(title,author,url))
    for s in songs:
      print(s[0],s[1],s[2])
    print(\"-------------------------------------------------------\")
    index = int(input(\"请输入您想下载的歌曲版本:\"))
    song_download(url[index],title[index],author[index])
  else:
    print(\"对不起，暂无搜索结果!\")

if __name__ == \"__main__\":
  get_music_name()

演示一下运行效果：

Python基于爬虫实现全网搜索并下载音乐

然后文件下就会出现对应的歌曲.mp3文件

Python基于爬虫实现全网搜索并下载音乐

并且每次的歌曲检索都是在PyCharm专业版的控制台中进行的，这样的用户体验就非常糟糕，所以针对以上两个问题，我对源代码进行了改进。

# 导入模块
from tkinter import *
import requests
import jsonpath
import os
from urllib.request import urlretrieve

# 2.功能实现
\"\"\"
  1.url
  2.模拟浏览器请求
  3.解析网页源代码
  4.保存数据
\"\"\"
def song_download(url,title,author):
  # 创建文件夹
  os.makedirs(\"music\",exist_ok=True)
  path = \'music\\{}.mp3\'.format(title)
  text.insert(END,\'歌曲:{0}-{1},正在下载...\'.format(title,author))
  # 文本框滑动
  text.see(END)
  # 更新
  text.update()
  # 下载
  urlretrieve(url,path)
  text.insert(END,\'下载完毕,{0}-{1},请试听\'.format(title,author))
  # 文本框滑动
  text.see(END)
  # 更新
  text.update()

def get_music_name():
  \"\"\"
  搜索歌曲名称
  :return:
  \"\"\"
  name = entry.get()
  platfrom = var.get()
  # name = \'白月光与朱砂痣\'
  url = \'https://music.liuzhijin.cn/\'
  headers = {
    \"user-agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36\",
    # 判断请求是异步还是同步
    \"x-requested-with\":\"XMLHttpRequest\",
  }
  param = {
    \"input\":name,
    \"filter\":\"name\",
    \"type\":platfrom,
    \"page\": 1,
  }
  res = requests.post(url=url,data=param,headers=headers)
  json_text = res.json()

  title = jsonpath.jsonpath(json_text,\'$..title\')
  author = jsonpath.jsonpath(json_text,\'$..author\')
  url = jsonpath.jsonpath(json_text, \'$..url\')
  print(title,author,url)
  song_download(url[0],title[0],author[0])


# 1.用户界面
# 创建画板
root = Tk()
# 设置窗口标题
root.title(\'全网音乐下载器\')
# 设置窗口大小以及出现的位置
root.geometry(\'560x450+400+200\')
# 标签组件
label = Label(root,text=\"请输入下载的歌曲:\",font=(\'楷体\',20))
# 定位与布局
label.grid(row=0)
# 输入框组件
entry = Entry(root,font=(\'宋体\',20))
entry.grid(row=0,column=1)
# 单选按钮
var = StringVar()
r1 = Radiobutton(root,text=\'网易云\',variable=var,value=\'netease\')
r1.grid(row=1,column=0)
r2 = Radiobutton(root,text=\'QQ\',variable=var,value=\'qq\')
r2.grid(row=1,column=1)
# 列表框
text = Listbox(root,font=(\'楷体\',16),width=50,height=15)
text.grid(row=2,columnspan=2)
# 下载按钮
button1 = Button(root,text=\'开始下载\',font=(\'楷体\',15),command=get_music_name)
button1.grid(row=3,column=0)
button2 = Button(root,text=\'退出程序\',font=(\'楷体\',15),command=root.quit)
button2.grid(row=3,column=1)
# 显示界面
root.mainloop()