Skip to content

Commit d08d916

Browse files
committed
提交
1 parent 7cd5776 commit d08d916

File tree

3 files changed

+133
-0
lines changed

3 files changed

+133
-0
lines changed

.DS_Store

0 Bytes
Binary file not shown.

xianhuan/.DS_Store

2 KB
Binary file not shown.
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: 闲欢
5+
"""
6+
from urllib.request import quote
7+
import requests
8+
import random
9+
import traceback
10+
import time
11+
import datetime
12+
import math
13+
import json
14+
import pymysql
15+
16+
from stock import dateUtil
17+
18+
19+
class report:
20+
21+
def __init__(self):
22+
self.header = {"Connection": "keep-alive",
23+
"Cookie": "st_si=30608909553535; cowminicookie=true; st_asi=delete; cowCookie=true; intellpositionL=2048px; qgqp_b_id=c941d206e54fae32beffafbef56cc4c0; st_pvi=19950313383421; st_sp=2020-10-19%2020%3A19%3A47; st_inirUrl=http%3A%2F%2Fdata.eastmoney.com%2Fstock%2Flhb.html; st_sn=15; st_psi=20201026225423471-113300303752-5813912186; intellpositionT=2579px",
24+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36",
25+
"Host": "reportapi.eastmoney.com"
26+
}
27+
28+
self.conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='east_money', charset='utf8')
29+
self.cur = self.conn.cursor()
30+
self.url = 'http://reportapi.eastmoney.com/report/list?cb=datatable1351846&industryCode=*&pageSize={}&industry=*&rating=&ratingChange=&beginTime={}&endTime={}&pageNo={}&fields=&qType=0&orgCode=&code=*&rcode=&p=2&pageNum=2&_=1603724062679'
31+
32+
def getHtml(self, pageSize, beginTime, endTime, pageNo):
33+
print(self.url.format(pageSize, beginTime, endTime, pageNo))
34+
response = requests.get(self.url.format(pageSize, beginTime, endTime, pageNo), headers=self.header)
35+
html = response.content.decode("utf-8")
36+
37+
return html
38+
39+
def format_content(self, content):
40+
if len(content):
41+
content = content.replace('datatable1351846(', '')[:-1]
42+
return json.loads(content)
43+
else:
44+
return None
45+
46+
47+
def parse_data(self, items):
48+
result_list = []
49+
for i in items['data']:
50+
result = {}
51+
obj = i
52+
result['title'] = obj['title'] #报告名称
53+
result['stockName'] = obj['stockName'] #股票名称
54+
result['stockCode'] = obj['stockCode'] #股票code
55+
result['orgCode'] = obj['stockCode'] #机构code
56+
result['orgName'] = obj['orgName'] #机构名称
57+
result['orgSName'] = obj['orgSName'] #机构简称
58+
result['publishDate'] = obj['publishDate'] #发布日期
59+
result['predictNextTwoYearEps'] = obj['predictNextTwoYearEps'] #后年每股盈利
60+
result['predictNextTwoYearPe'] = obj['predictNextTwoYearPe'] #后年市盈率
61+
result['predictNextYearEps'] = obj['predictNextYearEps'] # 明年每股盈利
62+
result['predictNextYearPe'] = obj['predictNextYearPe'] # 明年市盈率
63+
result['predictThisYearEps'] = obj['predictThisYearEps'] #今年每股盈利
64+
result['predictThisYearPe'] = obj['predictThisYearPe'] #今年市盈率
65+
result['indvInduCode'] = obj['indvInduCode'] # 行业代码
66+
result['indvInduName'] = obj['indvInduName'] # 行业名称
67+
result['lastEmRatingName'] = obj['lastEmRatingName'] # 上次评级名称
68+
result['lastEmRatingValue'] = obj['lastEmRatingValue'] # 上次评级代码
69+
result['emRatingValue'] = obj['emRatingValue'] # 评级代码
70+
result['emRatingName'] = obj['emRatingName'] # 评级名称
71+
result['ratingChange'] = obj['ratingChange'] # 评级变动
72+
result['researcher'] = obj['researcher'] # 研究员
73+
result['encodeUrl'] = obj['encodeUrl'] # 链接
74+
result['count'] = int(obj['count']) # 近一月个股研报数
75+
76+
result_list.append(result)
77+
78+
return result_list
79+
80+
81+
def get_data(self, start_date, end_date):
82+
html = self.getHtml(100, start_date, end_date, 1)
83+
content_json = self.format_content(html)
84+
page_num = content_json['TotalPage']
85+
print(page_num)
86+
87+
data_list = []
88+
for i in range(1, page_num + 1):
89+
ihtml = self.getHtml(100, start_date, end_date, i)
90+
icontent_json = self.format_content(ihtml)
91+
result_list = self.parse_data(icontent_json)
92+
data_list.extend(result_list)
93+
94+
time.sleep(random.randint(1, 4))
95+
return data_list
96+
97+
def deal(self, start_date, end_date):
98+
data_list = self.get_data(start_date, end_date)
99+
if data_list and data_list is not None:
100+
self.insertdb(data_list)
101+
102+
self.cur.close()
103+
self.conn.close()
104+
105+
def insertdb(self, data_list):
106+
attrs = ['title', 'stockName', 'stockCode', 'orgCode', 'orgName', 'orgSName', 'publishDate', 'predictNextTwoYearEps',
107+
'predictNextTwoYearPe', 'predictNextYearEps', 'predictNextYearPe', 'predictThisYearEps', 'predictThisYearPe',
108+
'indvInduCode', 'indvInduName', 'lastEmRatingName', 'lastEmRatingValue', 'emRatingValue',
109+
'emRatingName', 'ratingChange', 'researcher', 'encodeUrl', 'count']
110+
insert_tuple = []
111+
for obj in data_list:
112+
insert_tuple.append((obj['title'], obj['stockName'], obj['stockCode'], obj['orgCode'], obj['orgName'], obj['orgSName'], obj['publishDate'], obj['predictNextTwoYearEps'], obj['predictNextTwoYearPe'], obj['predictNextYearEps'], obj['predictNextYearPe'], obj['predictThisYearEps'], obj['predictThisYearPe'], obj['indvInduCode'], obj['indvInduName'], obj['lastEmRatingName'], obj['lastEmRatingValue'], obj['emRatingValue'],obj['emRatingName'], obj['ratingChange'], obj['researcher'], obj['encodeUrl'], obj['count']))
113+
values_sql = ['%s' for v in attrs]
114+
attrs_sql = '('+','.join(attrs)+')'
115+
values_sql = ' values('+','.join(values_sql)+')'
116+
sql = 'insert into %s' % 'report'
117+
sql = sql + attrs_sql + values_sql
118+
try:
119+
print(sql)
120+
for i in range(0, len(insert_tuple), 20000):
121+
self.cur.executemany(sql, tuple(insert_tuple[i:i+20000]))
122+
self.conn.commit()
123+
except pymysql.Error as e:
124+
self.conn.rollback()
125+
error = 'insertMany executemany failed! ERROR (%s): %s' % (e.args[0], e.args[1])
126+
print(error)
127+
128+
129+
if __name__ == "__main__":
130+
report = report()
131+
today = dateUtil.DateUtil.get_today()
132+
one_year_before = dateUtil.DateUtil.get_format_day(dateUtil.DateUtil.get_minus_time(datetime.datetime.now(), days=365), '%Y-%m-%d')
133+
report.deal(one_year_before, today)

0 commit comments

Comments
 (0)