|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +""" |
| 4 | +@author: 闲欢 |
| 5 | +""" |
| 6 | +from urllib.request import quote |
| 7 | +import requests |
| 8 | +import random |
| 9 | +import traceback |
| 10 | +import time |
| 11 | +import datetime |
| 12 | +import math |
| 13 | +import json |
| 14 | +import pymysql |
| 15 | + |
| 16 | +from stock import dateUtil |
| 17 | + |
| 18 | + |
| 19 | +class report: |
| 20 | + |
| 21 | + def __init__(self): |
| 22 | + self.header = {"Connection": "keep-alive", |
| 23 | + "Cookie": "st_si=30608909553535; cowminicookie=true; st_asi=delete; cowCookie=true; intellpositionL=2048px; qgqp_b_id=c941d206e54fae32beffafbef56cc4c0; st_pvi=19950313383421; st_sp=2020-10-19%2020%3A19%3A47; st_inirUrl=http%3A%2F%2Fdata.eastmoney.com%2Fstock%2Flhb.html; st_sn=15; st_psi=20201026225423471-113300303752-5813912186; intellpositionT=2579px", |
| 24 | + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36", |
| 25 | + "Host": "reportapi.eastmoney.com" |
| 26 | + } |
| 27 | + |
| 28 | + self.conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='east_money', charset='utf8') |
| 29 | + self.cur = self.conn.cursor() |
| 30 | + self.url = 'http://reportapi.eastmoney.com/report/list?cb=datatable1351846&industryCode=*&pageSize={}&industry=*&rating=&ratingChange=&beginTime={}&endTime={}&pageNo={}&fields=&qType=0&orgCode=&code=*&rcode=&p=2&pageNum=2&_=1603724062679' |
| 31 | + |
| 32 | + def getHtml(self, pageSize, beginTime, endTime, pageNo): |
| 33 | + print(self.url.format(pageSize, beginTime, endTime, pageNo)) |
| 34 | + response = requests.get(self.url.format(pageSize, beginTime, endTime, pageNo), headers=self.header) |
| 35 | + html = response.content.decode("utf-8") |
| 36 | + |
| 37 | + return html |
| 38 | + |
| 39 | + def format_content(self, content): |
| 40 | + if len(content): |
| 41 | + content = content.replace('datatable1351846(', '')[:-1] |
| 42 | + return json.loads(content) |
| 43 | + else: |
| 44 | + return None |
| 45 | + |
| 46 | + |
| 47 | + def parse_data(self, items): |
| 48 | + result_list = [] |
| 49 | + for i in items['data']: |
| 50 | + result = {} |
| 51 | + obj = i |
| 52 | + result['title'] = obj['title'] #报告名称 |
| 53 | + result['stockName'] = obj['stockName'] #股票名称 |
| 54 | + result['stockCode'] = obj['stockCode'] #股票code |
| 55 | + result['orgCode'] = obj['stockCode'] #机构code |
| 56 | + result['orgName'] = obj['orgName'] #机构名称 |
| 57 | + result['orgSName'] = obj['orgSName'] #机构简称 |
| 58 | + result['publishDate'] = obj['publishDate'] #发布日期 |
| 59 | + result['predictNextTwoYearEps'] = obj['predictNextTwoYearEps'] #后年每股盈利 |
| 60 | + result['predictNextTwoYearPe'] = obj['predictNextTwoYearPe'] #后年市盈率 |
| 61 | + result['predictNextYearEps'] = obj['predictNextYearEps'] # 明年每股盈利 |
| 62 | + result['predictNextYearPe'] = obj['predictNextYearPe'] # 明年市盈率 |
| 63 | + result['predictThisYearEps'] = obj['predictThisYearEps'] #今年每股盈利 |
| 64 | + result['predictThisYearPe'] = obj['predictThisYearPe'] #今年市盈率 |
| 65 | + result['indvInduCode'] = obj['indvInduCode'] # 行业代码 |
| 66 | + result['indvInduName'] = obj['indvInduName'] # 行业名称 |
| 67 | + result['lastEmRatingName'] = obj['lastEmRatingName'] # 上次评级名称 |
| 68 | + result['lastEmRatingValue'] = obj['lastEmRatingValue'] # 上次评级代码 |
| 69 | + result['emRatingValue'] = obj['emRatingValue'] # 评级代码 |
| 70 | + result['emRatingName'] = obj['emRatingName'] # 评级名称 |
| 71 | + result['ratingChange'] = obj['ratingChange'] # 评级变动 |
| 72 | + result['researcher'] = obj['researcher'] # 研究员 |
| 73 | + result['encodeUrl'] = obj['encodeUrl'] # 链接 |
| 74 | + result['count'] = int(obj['count']) # 近一月个股研报数 |
| 75 | + |
| 76 | + result_list.append(result) |
| 77 | + |
| 78 | + return result_list |
| 79 | + |
| 80 | + |
| 81 | + def get_data(self, start_date, end_date): |
| 82 | + html = self.getHtml(100, start_date, end_date, 1) |
| 83 | + content_json = self.format_content(html) |
| 84 | + page_num = content_json['TotalPage'] |
| 85 | + print(page_num) |
| 86 | + |
| 87 | + data_list = [] |
| 88 | + for i in range(1, page_num + 1): |
| 89 | + ihtml = self.getHtml(100, start_date, end_date, i) |
| 90 | + icontent_json = self.format_content(ihtml) |
| 91 | + result_list = self.parse_data(icontent_json) |
| 92 | + data_list.extend(result_list) |
| 93 | + |
| 94 | + time.sleep(random.randint(1, 4)) |
| 95 | + return data_list |
| 96 | + |
| 97 | + def deal(self, start_date, end_date): |
| 98 | + data_list = self.get_data(start_date, end_date) |
| 99 | + if data_list and data_list is not None: |
| 100 | + self.insertdb(data_list) |
| 101 | + |
| 102 | + self.cur.close() |
| 103 | + self.conn.close() |
| 104 | + |
| 105 | + def insertdb(self, data_list): |
| 106 | + attrs = ['title', 'stockName', 'stockCode', 'orgCode', 'orgName', 'orgSName', 'publishDate', 'predictNextTwoYearEps', |
| 107 | + 'predictNextTwoYearPe', 'predictNextYearEps', 'predictNextYearPe', 'predictThisYearEps', 'predictThisYearPe', |
| 108 | + 'indvInduCode', 'indvInduName', 'lastEmRatingName', 'lastEmRatingValue', 'emRatingValue', |
| 109 | + 'emRatingName', 'ratingChange', 'researcher', 'encodeUrl', 'count'] |
| 110 | + insert_tuple = [] |
| 111 | + for obj in data_list: |
| 112 | + insert_tuple.append((obj['title'], obj['stockName'], obj['stockCode'], obj['orgCode'], obj['orgName'], obj['orgSName'], obj['publishDate'], obj['predictNextTwoYearEps'], obj['predictNextTwoYearPe'], obj['predictNextYearEps'], obj['predictNextYearPe'], obj['predictThisYearEps'], obj['predictThisYearPe'], obj['indvInduCode'], obj['indvInduName'], obj['lastEmRatingName'], obj['lastEmRatingValue'], obj['emRatingValue'],obj['emRatingName'], obj['ratingChange'], obj['researcher'], obj['encodeUrl'], obj['count'])) |
| 113 | + values_sql = ['%s' for v in attrs] |
| 114 | + attrs_sql = '('+','.join(attrs)+')' |
| 115 | + values_sql = ' values('+','.join(values_sql)+')' |
| 116 | + sql = 'insert into %s' % 'report' |
| 117 | + sql = sql + attrs_sql + values_sql |
| 118 | + try: |
| 119 | + print(sql) |
| 120 | + for i in range(0, len(insert_tuple), 20000): |
| 121 | + self.cur.executemany(sql, tuple(insert_tuple[i:i+20000])) |
| 122 | + self.conn.commit() |
| 123 | + except pymysql.Error as e: |
| 124 | + self.conn.rollback() |
| 125 | + error = 'insertMany executemany failed! ERROR (%s): %s' % (e.args[0], e.args[1]) |
| 126 | + print(error) |
| 127 | + |
| 128 | + |
| 129 | +if __name__ == "__main__": |
| 130 | + report = report() |
| 131 | + today = dateUtil.DateUtil.get_today() |
| 132 | + one_year_before = dateUtil.DateUtil.get_format_day(dateUtil.DateUtil.get_minus_time(datetime.datetime.now(), days=365), '%Y-%m-%d') |
| 133 | + report.deal(one_year_before, today) |
0 commit comments