Skip to content

Commit b442130

Browse files
author
mochazi
committed
2020.5.30 commit🎉
1 parent aeffaa6 commit b442130

File tree

5 files changed

+1739
-2
lines changed

5 files changed

+1739
-2
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
# **Python3Webcrawler**
2-
## **[哔哩哔哩作者:-相依-](https://space.bilibili.com/343154012)**  **UPDATE 2020 4 27**
2+
## **[哔哩哔哩作者:-相依-](https://space.bilibili.com/343154012)**  **UPDATE 2020 5 30**
33
> **精心挑选了几个爬虫,给大家在学Scrapy框架之前打基础。**
44
>> **该项目仅限学习交流,请勿用于商业用途,如有侵权,请联系删除。**
55
66
|**程序依赖**|**安装指令**|**项目使用版本**|
77
|:----:|:--------:|:--------:|
88
|**lxml**|**pip install lxml**|**4.5.0**|
9-
|**requests**|**pip install requests**|**2.23.0**|
109
|**aiohttp**|**pip install aiohttp**|**3.6.2**|
10+
|**requests**|**pip install requests**|**2.23.0**|
11+
|**PyExecJS**|**pip install PyExecJS**|**1.5.1**|
1112
|**sqlalchemy**|**pip install sqlalchemy**|**1.3.16**|
1213
|**beautifulsoup4**|**pip install beautifulsoup4**|**4.9.0**|
1314

1415
* ### **京东   [官网地址](https://item.jd.com)**
16+
* ### **网易   [官网地址](https://www.163.com/)**
1517
* ### **房天下  [官网地址](https://www.fang.com)**
1618
* ### **快代理  [官网地址](https://www.kuaidaili.com)**
1719
* ### **QQ音乐   [官网地址](https://y.qq.com)**

破解有道翻译/crawl.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#Python3.7
2+
#encoding = utf-8
3+
4+
import time, math,random,hashlib
5+
import requests
6+
7+
def get_html(name):
8+
9+
url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
10+
11+
12+
ts = math.floor(time.time() * 1000)
13+
salt = ts + int(random.random() * 10)
14+
15+
sign = hashlib.md5(("fanyideskweb" + name + str(salt) +"Nw(nmmbP%A-r6U3EUn]Aj").encode('utf-8')).hexdigest()
16+
bv = hashlib.md5(("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36").encode('utf-8')).hexdigest()
17+
18+
data = {
19+
'i': name,
20+
'from': 'AUTO',
21+
'to': 'AUTO',
22+
'smartresult': 'dict',
23+
'client': 'fanyideskweb',
24+
'salt': salt,
25+
'sign': sign,
26+
'ts': ts,
27+
'bv': bv,
28+
'doctype': 'json',
29+
'version': '2.1',
30+
'keyfrom': 'fanyi.web',
31+
'action': 'FY_BY_CLICKBUTTION',
32+
}
33+
34+
headers = {
35+
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
36+
'Referer': 'http://fanyi.youdao.com/',
37+
#参考链接:http://fanyi.youdao.com/
38+
#请在此处填写你的 Cookie
39+
}
40+
41+
42+
html = requests.post(url, headers=headers, data=data)#有需要的可以改成session写法
43+
# print(html.json())
44+
print('正在执行有道翻译程序:')
45+
print('翻译的词:{}'.format(html.json()['translateResult'][0][0]['src']))
46+
print('翻译结果:{}'.format(html.json()['translateResult'][0][0]['tgt']))
47+
48+
if __name__ == "__main__":
49+
50+
name = '靓仔'
51+
52+
get_html(name)

破解网易登录/crawl.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#Python3.7
2+
#encoding = utf-8
3+
4+
import execjs,requests,time
5+
6+
class User():#获取用户密码加密
7+
8+
def __init__(self,user_id,user_password):
9+
10+
self.user_id = user_id
11+
self.user_password = user_password
12+
self.session = requests.session()
13+
self.session.headers = {
14+
'Referer':'https://dl.reg.163.com/webzj/v1.0.1/pub/index_dl2_new.html?cd=https%3A%2F%2Ftemp.163.com%2Fspecial%2F00804C4H%2F&cf=urs_style_2019.css%3Ft%3D20190527&MGID=1590637061742.5342&wdaId=&pkid=MODXOXd&product=163',
15+
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
16+
#请在此处输入你的Cookie
17+
#参考链接 https://www.163.com/
18+
}
19+
20+
def get_pw(self):
21+
22+
with open('pw.js','r',encoding='utf-8') as f:
23+
content = f.read()
24+
25+
js_data = execjs.compile(content)#编译js
26+
pw = js_data.call('get_pw',self.user_password)#调用get_pw函数
27+
return pw
28+
29+
def get_rtid(self):
30+
31+
with open('rtid.js','r',encoding='utf-8') as f:
32+
content = f.read()
33+
34+
js_data = execjs.compile(content)#编译js
35+
rtid = js_data.call('get_rtid')#调用get_rtid函数
36+
return rtid
37+
38+
def get_tk(self,rtid):
39+
40+
url = 'https://dl.reg.163.com/dl/gt'
41+
42+
params = {
43+
'un':self.user_id,
44+
'pkid':'MODXOXd',
45+
'pd':'163',
46+
'channel':'0',
47+
'topURL':'https://www.163.com/',
48+
'rtid':rtid,
49+
'nocache':int(time.time()*1000),
50+
}
51+
52+
html = self.session.get(url,params = params).json()
53+
return html['tk']
54+
55+
def get_login(self,pw,rtid,tk):
56+
57+
url = 'https://dl.reg.163.com/dl/l'
58+
59+
60+
data = {
61+
'channel':'0',
62+
'd':'10',
63+
'domains':"163.com",
64+
'l':'0',
65+
'pd':"163",
66+
'pkid':"MODXOXd",
67+
'pw':pw,
68+
'pwdKeyUp':'1',
69+
'rtid':rtid,
70+
't':int(time.time()*1000),
71+
'tk':tk,
72+
'topURL':"https://www.163.com/",
73+
'un':self.user_id,
74+
}
75+
76+
html = self.session.post(url,json = data).json()#传递JSON
77+
return html
78+
79+
80+
if __name__ == "__main__":
81+
82+
user = User('请输入你的账号','请输入你的密码')
83+
pw = user.get_pw()#获取pw
84+
rtid = user.get_rtid()#获取rtid
85+
86+
tk = user.get_tk(rtid)#获取tk
87+
88+
login = user.get_login(pw,rtid,tk)
89+
print(login)
90+
91+

0 commit comments

Comments
 (0)