1+ #Python3.7
2+ #encoding = utf-8
3+
4+ import requests ,os ,json ,math
5+ from urllib import parse
6+ from concurrent .futures import ThreadPoolExecutor ,ProcessPoolExecutor
7+ from db import SQLsession ,Song
8+
9+ headers = {
10+ 'user-agent' :'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36' ,
11+ 'referer' :'https://y.qq.com/portal/singer_list.html' ,
12+ #参考链接 https://y.qq.com/portal/singer_list.html#page=1&index=1&
13+ }
14+
15+
16+ session = SQLsession ()
17+
18+ def myProcess ():
19+ #把歌手按照首字母分为27类
20+ with ProcessPoolExecutor (max_workers = 2 ) as p :#创建27个进程
21+ for i in range (2 ,3 ):#28
22+ p .submit (get_singer_mid ,i )
23+
24+ def get_singer_mid (index ):
25+ #index = 1-----27
26+ #打开歌手列表页面,找出singerList,找出所有歌手的数目,除于80,构造后续页面获取page歌手
27+ #找出mid, 用于歌手详情页
28+
29+ data = '{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer"' \
30+ ',"method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,' \
31+ '"index":%s,"sin":0,"cur_page":1}}}' % (str (index ))
32+
33+ url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI0432880619182503' \
34+ '&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&out' \
35+ 'Charset=utf-8¬ice=0&platform=yqq.json&needNewCode=0' \
36+ '&data={}' .format (parse .quote (data ))
37+
38+ html = requests .get (url ).json ()
39+ total = html ['singerList' ]['data' ]['total' ]#多少个歌手
40+ pages = int (math .floor (int (total )/ 80 ))
41+ thread_number = pages
42+
43+ Thread = ThreadPoolExecutor (max_workers = thread_number )
44+
45+ sin = 0
46+ #分页迭代每一个字母下的所有页面歌手
47+ for page in range (1 ,pages ):
48+ data = '{"comm":{"ct":24,"cv":0},"singerList":{"module":"Music.SingerListServer",' \
49+ '"method":"get_singer_list","param":{"area":-100,"sex":-100,"genre":-100,"' \
50+ 'index":%s,"sin":%d,"cur_page":%s}}}' % (str (index ),sin ,str (page ))
51+
52+ url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI0432880619182503' \
53+ '&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&out' \
54+ 'Charset=utf-8¬ice=0&platform=yqq.json&needNewCode=0' \
55+ '&data={}' .format (parse .quote (data ))
56+
57+ html = requests .get (url ,headers = headers ).json ()
58+
59+ sings = html ['singerList' ]['data' ]['singerlist' ]
60+
61+ for sing in sings :
62+
63+ singer_name = sing ['singer_name' ]
64+ mid = sing ['singer_mid' ]
65+
66+ Thread .submit (get_singer_data ,mid = mid ,
67+ singer_name = singer_name ,)
68+ sin += 80
69+
70+
71+
72+ #获取歌手信息
73+ def get_singer_data (mid ,singer_name ):
74+ #获取歌手mid,进入歌手详情页,也就是每一个歌手歌曲所在页面
75+ #找出歌手的歌曲信息页
76+
77+ params = '{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList",' \
78+ '"param":{"order":1,"singerMid":"%s","begin":0,"num":10},' \
79+ '"module":"musichall.song_list_server"}}' % str (mid )
80+
81+ url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?-=getSingerSong9513357793133783&' \
82+ 'g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8' \
83+ '¬ice=0&platform=yqq.json&needNewCode=0*&data={}' .format (parse .quote (params ))
84+
85+ html = requests .session ()
86+ content = html .get (url ,headers = headers ).json ()
87+
88+ songs_num = content ['singerSongList' ]['data' ]['totalNum' ]
89+
90+ if int (songs_num )<= 80 :
91+ params = '{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList",' \
92+ '"param":{"order":1,"singerMid":"%s","begin":0,"num":%s},' \
93+ '"module":"musichall.song_list_server"}}' % (str (mid ),int (songs_num ))
94+
95+ url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?-=getSingerSong9513357793133783&' \
96+ 'g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8' \
97+ '¬ice=0&platform=yqq.json&needNewCode=0*&data={}' .format (parse .quote (params ))
98+
99+ html = requests .session ()
100+ content = html .get (url , headers = headers ).json ()
101+ datas = content ['singerSongList' ]['data' ]['songList' ]
102+
103+ for d in datas :
104+ sing_name = d ['songInfo' ]['title' ]
105+ songmid = d ['songInfo' ]['mid' ]
106+ try :
107+ session .add (Song (song_name = sing_name ,
108+ song_singer = singer_name ,
109+ song_mid = songmid ))
110+ session .commit ()
111+ print ('commit' )
112+ except :
113+ session .rollback ()
114+ print ('rollbeak' )
115+
116+ print (sing_name ,songmid ,singer_name )
117+ download (songmid ,sing_name )
118+ else :
119+ for a in range (0 ,songs_num ,80 ):
120+
121+ params = '{"comm":{"ct":24,"cv":0},"singerSongList":{"method":"GetSingerSongList",' \
122+ '"param":{"order":1,"singerMid":"%s","begin":%s,"num":%s},' \
123+ '"module":"musichall.song_list_server"}}' % (str (mid ), int (a ),int (songs_num ))
124+
125+ url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?-=getSingerSong9513357793133783&' \
126+ 'g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8' \
127+ '¬ice=0&platform=yqq.json&needNewCode=0*&data={}' .format (parse .quote (params ))
128+
129+ html = requests .session ()
130+ content = html .get (url , headers = headers ).json ()
131+
132+ datas = content ['singerSongList' ]['data' ]['songList' ]
133+
134+ for d in datas :
135+ sing_name = d ['songInfo' ]['title' ]
136+ songmid = d ['songInfo' ]['mid' ]
137+ try :
138+ session .add (Song (song_name = sing_name ,
139+ song_singer = singer_name ,
140+ song_mid = songmid ))
141+ session .commit ()
142+ print ('commit' )
143+ except :
144+ session .rollback ()
145+ print ('rollbeak' )
146+
147+ print (sing_name , songmid , singer_name )
148+ download (songmid ,sing_name )
149+
150+ def download (songmid ,sing_name ):
151+ headers = {
152+ 'User-Agent' :'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36' ,
153+ 'Referer' :'https://y.qq.com/n/yqq/singer/000aHmbL2aPXWH.html' ,
154+ }
155+
156+
157+ data = '{"req":{"module":"CDN.SrfCdnDispatchServer","method":"GetCdnDispatch",' \
158+ '"param":{"guid":"5746584900","calltype":0,"userip":""}},"req_0":{"module":"vkey.GetVkeyServer",' \
159+ '"method":"CgiGetVkey","param":{"guid":"5746584900","songmid":["%s"],"songtype":[0],' \
160+ '"uin":"3262637034","loginflag":1,"platform":"20"}},"comm":{"uin":3262637034,"format":"json","ct":24,"cv":0}}' % str (songmid )
161+
162+
163+ url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey17693804549459324' \
164+ '&g_tk=5381&loginUin=3262637034&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8' \
165+ '¬ice=0&platform=yqq.json&needNewCode=0&data={}' .format (parse .quote (data ))
166+
167+ html = requests .get (url ,headers = headers )
168+
169+ try :
170+ purl = html .json ()['req_0' ]['data' ]['midurlinfo' ][0 ]['purl' ]
171+
172+ url = 'http://ws.stream.qqmusic.qq.com/{}' .format (purl )
173+
174+ html = requests .get (url ,headers = headers )
175+ html .encoding = 'utf-8'
176+
177+ filename = '歌曲'
178+
179+ if not os .path .exists (filename ):
180+ os .makedirs (filename )
181+
182+ with open ('./{}/{}.m4a' .format (filename ,sing_name ),'wb' ) as f :
183+ print ('\n 正在下载{}歌曲.....\n ' .format (sing_name ))
184+ f .write (html .content )
185+
186+ except :
187+ print ('查询权限失败,或没有查到对应的歌曲' )
188+
189+
190+
191+ if __name__ == '__main__' :
192+ myProcess ()
0 commit comments