Skip to content

Commit ab65245

Browse files
committed
提交代码
1 parent c5b3539 commit ab65245

File tree

13 files changed

+1064
-0
lines changed

13 files changed

+1064
-0
lines changed

xianhuan/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Python 代码实例
2+
3+
Python技术 公众号文章代码库
4+
5+
6+
关注公众号:python 技术,回复"python"一起学习交流
7+
8+
![](http://favorites.ren/assets/images/python.jpg)

xianhuan/bdindex/bdindexneed.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: 闲欢
5+
"""
6+
import requests
7+
import json
8+
from wordcloud import WordCloud
9+
from matplotlib import pyplot as plt
10+
11+
12+
class bdindex:
13+
# 搜索指数URL
14+
data_url = 'http://index.baidu.com/api/WordGraph/multi?wordlist[]={keyword}'
15+
# 检查关键词url
16+
check_url = 'http://index.baidu.com/api/AddWordApi/checkWordsExists?word=%s'
17+
headers = {
18+
"User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36',
19+
"Cookie": 'PSTM=1579955530; BAIDUID=C98F0EF9DCB3FC7E06D3B0FA63695787:FG=1; BIDUPSID=1FB86823BF26D806A0117921DBD66135; BDSFRCVID=bpFOJeC62ZTm5dnuEvqKKASNJe3SOxnTH6aoprlQ5IIcI75XA-7tEG0P_U8g0KubIXdfogKKLgOTHPIF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJkf_D8XtK83fP36q470htFjMfQXetJyaR3UWpQvWJ5TMC_whlOFK-I0XHLjWUPf-eOW3C5dLxQ8ShPC-tnZ56Lv5tRT-xb83JbnbxO83l02VM7ae-t2ynLVbNJ324RMW23r0h7mWUJzsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjjCajTcQjN_qq-JQa5TbstbHaJOqD4-k-PnVHPKXhUce2bQHKKI_0-3LK-0_hC_lD6LKjI6XDGLHJ6DfHJuHoC_htD0tftbzBPcqb-F0hHc2bP0hb6nLMbTeqR3bJRO6q6KKDjjLDGtXJjDDtJCH_5u-tDDKhD_6eTONjbtpbtbmhU-e56vQ3-5SWfK2sKTn0qjTD5v3hh6aaTv45J7ZVDKbtI8MbDLrMRoVK-A0hxLXt6kXKKOLVb6Eb4OkeqOJ2Mt5bjFihp_O0PrXB6bCQCoTKlvRjPbzX4Oo0jtpeG_DtjFqtJksL-35HtnheJ54KPu_-P4DeU8eaMRZ5mAqoqOoyI_bO45ODtD2yU_9X467K5btX5rnaIQqabIMeMJFbnOIjqDNbbPtafc43bRT0xKy5KJvfjCx-UAMhP-UyPvMWh37Lg5lMKoaMp78jR093JO4y4Ldj4oxJpOJ5JbMopCafD_2MCD6DTLhen-W5gTEaPoX5Kj-WjrJabCQHnnph4Tqhh4ShUO-f6_jtnuf8JOSKRr_eJR3MPoB5P4XbacKJT3-5RPt3RLKfnD5MD89epDh0btpbtbmhU-e3TrOb45vK-oGbKjCKqo-2t0F-xbW2PkfaR7ZVD_ytCL-bK_GenJb5ICEbfreanLXKK_s3tJIBhcqEIL4WlOVjt0H5toqbxni0G7waJKbLh7WDxbSj4QoKbDj0HoAB4JAJbTv56C5bp5nhMJ33j7JDMP0-4rvKP5y523i2n3vQpnmOqQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xXj_0-nDSHHuOJjOP; BDUSS=UJsNmwzSnVwLWJ6eGJiTGtBMXRxVkNVVHFYOEgzZ0NMemo0V2o4dG9RaH5xbmxlRVFBQUFBJCQAAAAAAAAAAAEAAAArVO4Kzt7D-3ZpcGVyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH8dUl5~HVJee; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1582632851; bdshare_firstime=1582719699670; bdindexid=lbhlaubfjakm0eklbjbislhal1; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1582940553; delPer=0; PSINO=6; H_PS_PSSID=1445_21119_30790_30905_30823_26350; RT="sl=2&ss=k771w9qf&tt=1yz&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=0pgwidvcjf8&ld=1ab9"',
20+
"Host": "index.baidu.com",
21+
"Referer": "http://index.baidu.com/v2/main/index.html"
22+
}
23+
24+
# 获取指数数据
25+
def get_index(self, params):
26+
url = self.data_url.format(**params)
27+
response = requests.get(url, headers=self.headers)
28+
29+
data = json.loads(response.text)['data']
30+
print(data)
31+
32+
pv_dict = {}
33+
ratio_dict = {}
34+
for item in data['wordlist'][0]['wordGraph']:
35+
pv_dict[item['word']] = item['pv']
36+
ratio_dict[item['word']] = item['ratio']
37+
38+
# 生成词云
39+
self.gen_wc_tags(pv_dict)
40+
self.gen_wc_tags(ratio_dict)
41+
42+
# 检查关键词是否存在
43+
def check_word(self, kw):
44+
url = self.check_url % kw
45+
response = requests.get(url, headers=self.headers)
46+
data = json.loads(response.text)['data']
47+
return not len(data['result'])
48+
49+
# 生成词云
50+
def gen_wc_tags(self, tags):
51+
# 设置一个底图
52+
# mask = np.array(Image.open('./bf.jpg'))
53+
wordcloud = WordCloud(background_color='black',
54+
mask=None,
55+
max_words=100,
56+
max_font_size=100,
57+
width=800,
58+
height=600,
59+
# 如果不设置中文字体,可能会出现乱码
60+
font_path='/System/Library/Fonts/PingFang.ttc').generate_from_frequencies(tags)
61+
62+
# 展示词云图
63+
plt.imshow(wordcloud, interpolation='bilinear')
64+
plt.axis('off')
65+
plt.show()
66+
67+
# 保存词云图
68+
wordcloud.to_file('./gzbd_wc.png')
69+
70+
if __name__ == '__main__':
71+
bdindex = bdindex()
72+
# keyword = '股市'
73+
# keyword = '新冠状病毒'
74+
keyword = '特朗普'
75+
word_exists = bdindex.check_word(keyword)
76+
if word_exists:
77+
params = {
78+
'keyword': keyword,
79+
}
80+
bdindex.get_index(params)
81+
else:
82+
print('keyword is not found')

xianhuan/populationone/anaone.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: 闲欢
5+
"""
6+
import numpy as np
7+
import pandas as pd
8+
import pyecharts.options as opts
9+
from pyecharts.charts import Line, Bar, Page, Pie
10+
11+
12+
# 读取数据
13+
pdata = pd.read_excel('populationone.xlsx')
14+
15+
16+
# 分析总人口
17+
def analysis_total():
18+
# 处理数据
19+
x_data = pdata['年份'].tolist()
20+
# 将人口单位转换为亿
21+
y_data1 = pdata['年末总人口(万人)'].map(lambda x: "%.2f" % (x / 10000)).tolist()
22+
y_data2 = pdata['人口自然增长率(‰)'].tolist()
23+
y_data3 = pdata['人口出生率(‰)'].tolist()
24+
y_data4 = pdata['人口死亡率(‰)'].tolist()
25+
26+
# 总人口柱状图
27+
bar = Bar(init_opts=opts.InitOpts(width="1200px", height="500px"))
28+
bar.add_xaxis(x_data)
29+
bar.add_yaxis("年末总人口(亿)", y_data1, category_gap="10%", label_opts=opts.LabelOpts(rotate=90, position="inside"))
30+
bar.set_global_opts(
31+
title_opts=opts.TitleOpts(title="年末总人口变化情况", pos_bottom="bottom", pos_left="center"),
32+
xaxis_opts=opts.AxisOpts(
33+
type_="category",
34+
name='年份',
35+
# 坐标轴名称显示位置
36+
name_location='end',
37+
# x轴数值与坐标点的偏移量
38+
# boundary_gap=False,
39+
axislabel_opts=opts.LabelOpts(is_show=True, margin=10, color="#000", interval=1, rotate=90),
40+
# axisline_opts=opts.AxisLineOpts(is_show=True, symbol="arrow"),
41+
axistick_opts=opts.AxisTickOpts(is_show=True, is_align_with_label=True),
42+
axispointer_opts=opts.AxisPointerOpts(type_="line", label=opts.LabelOpts(is_show=True))
43+
),
44+
# y轴相关选项设置
45+
yaxis_opts=opts.AxisOpts(
46+
type_="value",
47+
position="left",
48+
),
49+
legend_opts=opts.LegendOpts(is_show=True)
50+
)
51+
52+
# bar.render('bartest.html')
53+
54+
# 自然增长率、出生率、死亡率折线图
55+
line = Line(init_opts=opts.InitOpts(width="1400px", height="500px"))
56+
line.add_xaxis(x_data)
57+
line.add_yaxis(
58+
series_name="自然增长率(‰)",
59+
y_axis=y_data2,
60+
label_opts=opts.LabelOpts(
61+
is_show=False
62+
)
63+
)
64+
line.add_yaxis('出生率(‰)', y_data3, label_opts=opts.LabelOpts(is_show=False))
65+
line.add_yaxis('死亡率(‰)', y_data4, label_opts=opts.LabelOpts(is_show=False))
66+
line.set_global_opts(
67+
title_opts=opts.TitleOpts(title="人口自然增长率、出生率、死亡率", pos_bottom="bottom", pos_left="center"),
68+
xaxis_opts=opts.AxisOpts(
69+
name='年份',
70+
name_location='end',
71+
type_="value",
72+
min_="1949",
73+
max_interval=1,
74+
# 设置x轴不必与y轴的0对齐
75+
axisline_opts=opts.AxisLineOpts(is_on_zero=False),
76+
axislabel_opts=opts.LabelOpts(is_show=True, color="#000", interval=0, rotate=90),
77+
axistick_opts=opts.AxisTickOpts(is_show=True, is_align_with_label=True),
78+
axispointer_opts=opts.AxisPointerOpts(type_="shadow", label=opts.LabelOpts(is_show=True))
79+
),
80+
# y轴相关选项设置
81+
yaxis_opts=opts.AxisOpts(
82+
name='比例',
83+
type_="value",
84+
position="left",
85+
min_=-10,
86+
axislabel_opts=opts.LabelOpts(is_show=True)
87+
),
88+
legend_opts=opts.LegendOpts(is_show=True)
89+
)
90+
91+
# 渲染图像,将多个图像显示在一个html中
92+
# DraggablePageLayout表示可拖拽
93+
page = Page(layout=Page.DraggablePageLayout)
94+
page.add(bar)
95+
page.add(line)
96+
page.render('population_total.html')
97+
98+
# 分析男女比
99+
def analysis_sex():
100+
x_data = pdata['年份'].tolist()
101+
# 历年男性人口数
102+
y_data_man = pdata['男性人口(万人)']
103+
# 历年女性人口数
104+
y_data_woman = pdata['女性人口(万人)']
105+
# 2019年男女比饼图
106+
sex_2019 = pdata[pdata['年份'] == 2019][['男性人口(万人)', '女性人口(万人)']]
107+
108+
# 两列相减,获得新列
109+
y_data_man_woman = pdata['男性人口(万人)'] - pdata['女性人口(万人)']
110+
111+
pie = Pie()
112+
pie.add("", [list(z) for z in zip(['男', '女'], np.ravel(sex_2019.values))])
113+
pie.set_global_opts(title_opts=opts.TitleOpts(title="2019中国男女比", pos_bottom="bottom", pos_left="center"))
114+
pie.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
115+
pie.render('nvpie.html')
116+
117+
line = Line(init_opts=opts.InitOpts(width="1400px", height="500px"))
118+
line.add_xaxis(x_data)
119+
line.add_yaxis(
120+
series_name="男女差值",
121+
y_axis=y_data_man_woman.values,
122+
# 标出关键点的数据
123+
markpoint_opts=opts.MarkPointOpts(
124+
data=[
125+
opts.MarkPointItem(type_="min"),
126+
opts.MarkPointItem(type_="max"),
127+
opts.MarkPointItem(type_="average")
128+
]
129+
),
130+
label_opts=opts.LabelOpts(
131+
is_show=False
132+
),
133+
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average")])
134+
)
135+
line.set_global_opts(
136+
title_opts=opts.TitleOpts(title="中国70年(1949-2019)男女差值(万人)", pos_left="center", pos_top="bottom"),
137+
legend_opts=opts.LegendOpts(is_show=False),
138+
xaxis_opts=opts.AxisOpts(
139+
name='年份',
140+
name_location='end',
141+
type_="value",
142+
min_="1949",
143+
max_interval=1,
144+
# 设置x轴不必与y轴的0对齐
145+
axisline_opts=opts.AxisLineOpts(is_on_zero=False),
146+
axislabel_opts=opts.LabelOpts(is_show=True, color="#000", interval=0, rotate=90),
147+
axistick_opts=opts.AxisTickOpts(is_show=True, is_align_with_label=True),
148+
axispointer_opts=opts.AxisPointerOpts(type_="shadow", label=opts.LabelOpts(is_show=True))
149+
),
150+
yaxis_opts=opts.AxisOpts(
151+
name='差值(万人)',
152+
type_="value",
153+
position="left",
154+
axislabel_opts=opts.LabelOpts(is_show=True)
155+
),
156+
)
157+
158+
# 5、渲染图像,将多个图像显示在一个html中
159+
page = Page(layout=Page.DraggablePageLayout)
160+
page.add(pie)
161+
page.add(line)
162+
page.render('population_sex.html')
163+
164+
165+
if __name__ == '__main__':
166+
analysis_total()
167+
analysis_sex()
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: 闲欢
5+
"""
6+
import pandas as pd
7+
import requests
8+
9+
# 人口数量excel文件保存路径
10+
POPULATION_EXCEL_PATH = 'populationone.xlsx'
11+
12+
# 爬取人口数据
13+
def spider_population():
14+
# 请求参数 sj(时间),zb(指标)
15+
# 总人口
16+
dfwds1 = '[{"wdcode": "sj", "valuecode": "LAST70"}, {"wdcode":"zb","valuecode":"A0301"}]'
17+
# 人口出生率、死亡率、自然增长率
18+
dfwds2 = '[{"wdcode": "sj", "valuecode": "LAST70"}, {"wdcode":"zb","valuecode":"A0302"}]'
19+
url = 'http://data.stats.gov.cn/easyquery.htm?m=QueryData&dbcode=hgnd&rowcode=sj&colcode=zb&wds=[]&dfwds={}'
20+
# 将所有数据放这里,年份为key,值为各个指标值组成的list
21+
# 因为 2019 年数据还没有列入到年度数据表里,所以根据统计局2019年经济报告中给出的人口数据计算得出
22+
# 数据顺序为历年数据
23+
population_dict = {
24+
25+
}
26+
27+
response1 = requests.get(url.format(dfwds1))
28+
get_population_info(population_dict, response1.json())
29+
30+
response2 = requests.get(url.format(dfwds2))
31+
get_population_info(population_dict, response2.json())
32+
33+
population_dict['2019'] = [2019, 140005, 71527, 68478, 84843, 55162, 10.48, 7.14, 3.34]
34+
save_excel(population_dict)
35+
36+
return population_dict
37+
38+
# 提取人口数量信息
39+
def get_population_info(population_dict, json_obj):
40+
datanodes = json_obj['returndata']['datanodes']
41+
for node in datanodes:
42+
# 获取年份
43+
year = node['code'][-4:]
44+
# 数据数值
45+
data = node['data']['data']
46+
if year in population_dict.keys():
47+
population_dict[year].append(data)
48+
else:
49+
population_dict[year] = [int(year), data]
50+
return population_dict
51+
52+
# 人口数据生成excel文件
53+
def save_excel(population_dict):
54+
# .T 是行列转换
55+
df = pd.DataFrame(population_dict).T[::-1]
56+
df.columns = ['年份', '年末总人口(万人)', '男性人口(万人)', '女性人口(万人)', '城镇人口(万人)', '乡村人口(万人)', '人口出生率(‰)', '人口死亡率(‰)',
57+
'人口自然增长率(‰)']
58+
writer = pd.ExcelWriter(POPULATION_EXCEL_PATH)
59+
# columns参数用于指定生成的excel中列的顺序
60+
df.to_excel(excel_writer=writer, index=False, encoding='utf-8', sheet_name='中国70年人口数据')
61+
writer.save()
62+
writer.close()
63+
64+
65+
if __name__ == '__main__':
66+
result_dict = spider_population()
67+
# print(result_dict)

xianhuan/populationtwo/.DS_Store

6 KB
Binary file not shown.

0 commit comments

Comments
 (0)