Skip to content

Commit b09f9f7

Browse files
committed
创建目录
1 parent 0f969a6 commit b09f9f7

File tree

3 files changed

+94
-0
lines changed

3 files changed

+94
-0
lines changed

wuya/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Python 代码实例
2+
3+
- [jdSignin](https://github.com/JustDoPython/python-examples/tree/master/wuya/jdSignin) : 3分钟写一个脚本,每天定时薅东哥羊毛
4+
- [shRent_1](https://github.com/JustDoPython/python-examples/tree/master/wuya/shRent_1) : 为了在上海租房,我用python连夜爬了20000多条房源信息...
5+
6+
---
7+
8+
从小白到工程师的学习之路
9+
10+
关注公众号:python 技术,回复"python"一起学习交流
11+
12+
![](http://favorites.ren/assets/images/python.jpg)

wuya/jdSignin/jd_sign.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#pt_key=AAJi2BVXADDKWR6SHbo_ARE3SS6Bv7y-nKo0aWKLQjiYCcmreYfTUHN15o-nlRdh2M3iwV5kYv8; pt_pin=zhongulou;
2+
import requests
3+
4+
pt_key="替换为自己的pt_key"
5+
pt_pin="替换为自己的pt_pin"
6+
cookie="pt_key={}; pt_pin={}".format(pt_key, pt_pin)
7+
url = "https://api.m.jd.com/client.action?functionId=signBeanAct&body=%7B%22fp%22%3A%22-1%22%2C%22shshshfp%22%3A%22-1%22%2C%22shshshfpa%22%3A%22-1%22%2C%22referUrl%22%3A%22-1%22%2C%22userAgent%22%3A%22-1%22%2C%22jda%22%3A%22-1%22%2C%22rnVersion%22%3A%223.9%22%7D&appid=ld";
8+
headers = {
9+
"Content-Type":"application/x-www-form-urlencoded; charset=UTF-8",
10+
"User-Agent": "okhttp/3.12.1;jdmall;android;version/10.3.4;build/92451;",
11+
"Cookie": cookie
12+
}
13+
14+
response = requests.post(url=url, headers=headers)
15+
print(response.text)
16+
17+
res = response.json()
18+
returnMes = ""
19+
20+
if(res.get("errorMessage")==None):
21+
returnMes = "签到成功!"
22+
else:
23+
returnMes = "签到失败".format(res.get("errorMessage"))
24+
25+
requests.get("https://sctapi.ftqq.com/这里填写你个人的SendKey.send?title={}".format(returnMes))

wuya/shRent_1/shRent_1.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#通过某租房网站首页接口爬取租房房源信息
2+
import time, re, csv, requests
3+
import codecs
4+
from bs4 import BeautifulSoup
5+
6+
list=['jingan','xuhui','huangpu','changning','putuo','pudong','baoshan','hongkou','yangpu','minhang','jinshan','jiading','chongming','fengxian','songjiang','qingpu']
7+
print("****处理开始****")
8+
with open(r'..\document\sh.csv', 'wb+')as fp:
9+
fp.write(codecs.BOM_UTF8)
10+
f = open(r'..\document\sh.csv','w+',newline='', encoding='utf-8')
11+
writer = csv.writer(f)
12+
urls = []
13+
14+
for a in list:
15+
urls.append('https://sh.lianjia.com/zufang/{}/pg1rco11/'.format(a))
16+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.9 Safari/537.36'}
17+
18+
res = requests.get('https://sh.lianjia.com/zufang/{}/pg1rco11/'.format(a), headers=headers)
19+
content = res.text
20+
soup = BeautifulSoup(content, 'html.parser')
21+
page_num = int(soup.find('div', attrs={'class': 'content__pg'}).attrs['data-totalpage'])
22+
for i in range(2,page_num+1):
23+
urls.append('https://sh.lianjia.com/zufang/{}/pg{}rco11/'.format(a,i))
24+
25+
print(urls)
26+
27+
num=1
28+
for url in urls:
29+
print("正在处理第{}页数据...".format(str(num)))
30+
res1 = requests.get(url, headers=headers)
31+
content1 = res1.text
32+
soup1 = BeautifulSoup(content1, 'html.parser')
33+
infos = soup1.find('div', {'class': 'content__list'}).find_all('div', {'class': 'content__list--item'})
34+
35+
for info in infos:
36+
37+
house_url = 'https://sh.lianjia.com' + info.a['href']
38+
title = info.find('p', {'class': 'content__list--item--title'}).find('a').get_text().strip()
39+
group = title.split()[0][3:]
40+
price = info.find('span', {'class': 'content__list--item-price'}).get_text()
41+
tag = info.find('p', {'class': 'content__list--item--bottom oneline'}).get_text()
42+
mixed = info.find('p', {'class': 'content__list--item--des'}).get_text()
43+
mix = re.split(r'/', mixed)
44+
address = mix[0].strip()
45+
area = mix[1].strip()
46+
door_orientation = mix[2].strip()
47+
style = mix[-1].strip()
48+
region = re.split(r'-', address)[0]
49+
writer.writerow((house_url, title, group, price, area, address, door_orientation, style, tag, region))
50+
time.sleep(0)
51+
print("第{}页数据处理完毕,共{}条数据。".format(str(num), len(infos)))
52+
num+=1
53+
54+
55+
f.close()
56+
print("****全部完成****")
57+

0 commit comments

Comments
 (0)