Skip to content

Commit c5b3539

Browse files
committed
修改位置
1 parent 848e717 commit c5b3539

File tree

3 files changed

+295
-0
lines changed

3 files changed

+295
-0
lines changed

moumoubaimifan/ffmpeg/ffmpeg.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
import random
3+
4+
fileName = 'Frozen.mp4'
5+
logoName = 'logo.png'
6+
7+
# 截取视频
8+
#os.popen('ffmpeg -i '+fileName+' -ss 00:31:15 -to 00:34:45 -c copy LetItGo.mp4')
9+
10+
#截取图片
11+
12+
# for i in range(10):
13+
# hour = str(random.randint(0, 1))
14+
# min = str(random.randint(0, 59))
15+
# sec = str(random.randint(0, 59))
16+
# os.popen('ffmpeg -ss ' + hour + ':' + min + ':' + sec + ' -i ' + fileName + ' -vframes:v 1 -q:v 2 ' + str(i) +'.jpg')
17+
18+
# 添加水印
19+
#os.popen('ffmpeg -i '+fileName + ' -i ' + logoName + ' -filter_complex "overlay=main_w-overlay_w:10" logo.mp4')
20+
21+
# 添加文字水印
22+
os.popen('ffmpeg -i '+fileName+' -vf "drawtext=fontfile=Arial Unicode.ttf:text=\'文字水印\':x=w-100:y=100:fontsize=24:fontcolor=red@0.5:shadowy=2" wordWatemark.mp4')
23+
24+
# 提取音频
25+
os.popen('ffmpeg -i LetItGo.mp4 -vn -c:a copy LetItGo.aac')

moumoubaimifan/lagou/LgCrawler.py

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
import requests
2+
import time
3+
import random
4+
import pymysql
5+
import re
6+
from pyecharts.charts import BMap, Map, Geo, Bar, Pie, PictorialBar, Boxplot, WordCloud
7+
from pyecharts import options as opts
8+
from pyecharts.globals import ChartType, ThemeType, SymbolType
9+
10+
11+
class LgCrawler(object):
12+
conn = None
13+
cursor = None
14+
15+
16+
def __init__(self):
17+
18+
self.conn = pymysql.connect("127.0.0.1", "root", "12345678", "lagou")
19+
self.cursor = self.conn.cursor()
20+
21+
def insert(self):
22+
sql = 'INSERT INTO jobs (positionName,workYear,salary,city,education,positionAdvantage,companyLabelList,financeStage,companySize,industryField,firstType) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
23+
self.cursor.execute(sql)
24+
self.conn.commit()
25+
pass
26+
27+
def query(self, sql):
28+
29+
self.cursor.execute(sql)
30+
return self.cursor.fetchall()
31+
32+
def crawler(self):
33+
34+
headers = {
35+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
36+
'Host': 'www.lagou.com',
37+
'Referer': 'https://www.lagou.com/jobs/list_python/p-city_0?&cl=false&fromSearch=true&labelWords=&suginput=',
38+
'Cookie': 'user_trace_token=20200321120912-e091b8e2-ae3a-4e98-b8cc-7eda56613730; LGUID=20200321120912-103e3b3f-4b2d-4b40-aac8-de6f2151b52a; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1584763752; _ga=GA1.2.707847320.1584763752; _gid=GA1.2.1026377415.1584763752; index_location_city=%E5%85%A8%E5%9B%BD; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22170fb47eec2128-04c4426beb9ea8-396d7406-1764000-170fb47eec46c6%22%2C%22%24device_id%22%3A%22170fb47eec2128-04c4426beb9ea8-396d7406-1764000-170fb47eec46c6%22%7D; sajssdk_2015_cross_new_user=1; X_MIDDLE_TOKEN=b44cae2e06dda98341f7fda429c15d04; PRE_UTM=; PRE_HOST=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2Fjobs%2Flist%5Fpython%2Fp-city%5F0%3F%26cl%3Dfalse%26fromSearch%3Dtrue%26labelWords%3D%26suginput%3D; LGSID=20200321151013-aa659974-2803-4434-83e7-ed146560e5e0; PRE_SITE=; X_HTTP_TOKEN=f05004685d58bcda35257748511c75fb5b02e29508; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1584775254; _gat=1; LGRID=20200321152606-05042c06-9cea-4b97-9b47-908278188949',
39+
'X-Anit-Forge-Code': '0',
40+
'X-Anit-Forge-Token': 'None',
41+
'X-Requested-With': 'XMLHttpRequest'
42+
}
43+
page = 0
44+
totalCount = 1
45+
resultSize = 0
46+
while (page * resultSize) <= totalCount:
47+
page = page + 1
48+
url = "https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false"
49+
50+
datas = {
51+
'first': 'false',
52+
'pn': page,
53+
'kd': 'python'
54+
}
55+
if page == 1:
56+
datas['first'] = 'true'
57+
58+
html = requests.post(url, headers=headers, data=datas)
59+
result = html.json()
60+
61+
if page == 1:
62+
totalCount = result['content']['positionResult']['totalCount']
63+
resultSize = result['content']['positionResult']['resultSize']
64+
65+
jobs = result['content']['positionResult']['result']
66+
for job in jobs:
67+
job_array = [job['positionName'], job['workYear'], job['salary'], job['city'], job['education'],
68+
job['positionAdvantage'], "|".join(job['companyLabelList']),
69+
job['financeStage'], job['companySize'], job['industryField'], job['firstType']]
70+
71+
self.cursor.execute(self.sql, tuple(job_array))
72+
self.conn.commit()
73+
74+
r = random.randint(15, 30)
75+
time.sleep(r)
76+
77+
78+
def city(self):
79+
80+
sql = 'select city, count(1) counts from jobs group by city'
81+
results = self.query(sql)
82+
83+
c = (
84+
Geo()
85+
.add_schema(maptype="china")
86+
.add(
87+
"城市热力图",
88+
list(results),
89+
type_=ChartType.HEATMAP,
90+
)
91+
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
92+
.set_global_opts(
93+
visualmap_opts=opts.VisualMapOpts(),
94+
).render("拉钩城市热力图.html")
95+
)
96+
97+
sql = 'select city,counts from (select city, count(1) counts from jobs group by city) a order by counts desc limit 20'
98+
results = self.query(sql)
99+
citys = []
100+
values = []
101+
for row in results:
102+
citys.append(row[0])
103+
values.append(row[1])
104+
c = (
105+
Bar()
106+
.add_xaxis(citys)
107+
.add_yaxis("各城市的招聘数量 Top 20", values)
108+
.set_global_opts(
109+
xaxis_opts=opts.AxisOpts(name_rotate=60, name="城市", axislabel_opts={"rotate": 45})
110+
).render("拉钩城市招聘图.html")
111+
)
112+
113+
def education(self):
114+
sql = 'select education,count(1) counts from jobs group by education'
115+
results = self.query(sql)
116+
c = (
117+
Pie()
118+
.add("", list(results))
119+
.set_global_opts(title_opts=opts.TitleOpts(title='学历占比'))
120+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
121+
.render("拉勾学历.html")
122+
)
123+
124+
125+
def workYear(self):
126+
sql = 'select workYear,count(1) counts from jobs group by workYear'
127+
results = self.query(sql)
128+
c = (
129+
Pie()
130+
.add("", list(results))
131+
.set_global_opts(title_opts=opts.TitleOpts(title='工作经验占比'))
132+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c},{d}%"))
133+
.render("拉勾工作年限.html")
134+
)
135+
136+
def field(self):
137+
sql = 'select industryField from jobs'
138+
results = self.query(sql)
139+
rows = []
140+
for row in results:
141+
r = row[0].replace(',', ' ').replace('丨', ' ').replace('、', ' ')
142+
rows.extend(r.split(' '))
143+
sum = {}
144+
for r in rows:
145+
num = sum.get(r, 0) + 1
146+
sum[r] = num
147+
tup = sorted(sum.items(), key = lambda kv:(kv[1], kv[0]),reverse=True)
148+
sum = {}
149+
for k, v in tup[0:20]:
150+
sum[k + str(v)] = v
151+
location = list(sum.keys())
152+
values = list(sum.values())
153+
154+
c = (
155+
PictorialBar()
156+
.add_xaxis(location)
157+
.add_yaxis(
158+
"",
159+
values,
160+
label_opts=opts.LabelOpts(is_show=False),
161+
symbol_size=18,
162+
symbol_repeat="fixed",
163+
symbol_offset=[0, 0],
164+
is_symbol_clip=True,
165+
symbol=SymbolType.ROUND_RECT,
166+
)
167+
.reversal_axis()
168+
.set_global_opts(
169+
title_opts=opts.TitleOpts(title="热门行业"),
170+
xaxis_opts=opts.AxisOpts(is_show=False),
171+
yaxis_opts=opts.AxisOpts(
172+
axistick_opts=opts.AxisTickOpts(is_show=False),
173+
axisline_opts=opts.AxisLineOpts(
174+
linestyle_opts=opts.LineStyleOpts(opacity=0)
175+
),
176+
),
177+
)
178+
.render("拉勾行业.html")
179+
)
180+
181+
182+
def salary(self):
183+
sql = 'SELECT workYear,replace(salary,\'k\',\'\') s FROM jobs group by workYear,salary order by workYear'
184+
results = self.query(sql)
185+
sum = {}
186+
for r in results:
187+
rs = r[1].split('-')
188+
a = sum.get(r[0], [])
189+
a.extend(rs)
190+
sum[r[0]] = a
191+
192+
for k in sum:
193+
numbers = list(map(int, sum[k]))
194+
v = list(set(numbers))
195+
sum[k] = v
196+
197+
print(list(sum.values()))
198+
199+
c = Boxplot()
200+
c.add_xaxis(list(sum.keys()))
201+
c.add_yaxis("薪资与工作经验", c.prepare_data(list(sum.values())))
202+
c.set_global_opts(title_opts=opts.TitleOpts(title="薪资与工作经验"))
203+
c.render("拉勾薪资.html")
204+
205+
def ciyun(self):
206+
sql = 'select positionAdvantage,companyLabelList from jobs'
207+
results = self.query(sql)
208+
data = {}
209+
for row in results:
210+
positionStr = re.sub('\W+', ' ', row[0])
211+
labelStr = re.sub('\W+', ' ', row[1])
212+
a = positionStr.split(' ')
213+
b = labelStr.split(' ')
214+
a.extend(b)
215+
for i in a:
216+
data[i] = data.get(i, 0) + 1
217+
sum = []
218+
for k in data:
219+
sum.append((k,data[k]))
220+
221+
(
222+
WordCloud()
223+
.add(series_name="热点分析", data_pair=sum, word_size_range=[6, 66])
224+
.set_global_opts(
225+
title_opts=opts.TitleOpts(
226+
title="热点分析", title_textstyle_opts=opts.TextStyleOpts(font_size=23)
227+
),
228+
tooltip_opts=opts.TooltipOpts(is_show=True),
229+
)
230+
.render("拉勾福利.html")
231+
)
232+
233+
234+
def companySize(self):
235+
results = self.query('select companySize,count(1) counts from jobs group by companySize')
236+
c = (
237+
Pie()
238+
.add("", list(results))
239+
.set_global_opts(title_opts=opts.TitleOpts(title='企业大小'))
240+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c},{d}%"))
241+
.render("拉勾企业大小.html")
242+
)
243+
244+
245+
def financeStage(self):
246+
results = self.query('select financeStage,count(1) counts from jobs group by financeStage')
247+
c = (
248+
Pie()
249+
.add("", list(results))
250+
.set_global_opts(title_opts=opts.TitleOpts(title='企业融资占比'))
251+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c},{d}%"))
252+
.render("拉勾融资.html")
253+
)
254+
if __name__ == '__main__':
255+
LgCrawler().field()

moumoubaimifan/lagou/jobs.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
CREATE TABLE `jobs` (
2+
`id` int(11) NOT NULL AUTO_INCREMENT,
3+
`positionName` varchar(45) DEFAULT NULL,
4+
`workYear` varchar(45) DEFAULT NULL,
5+
`salary` varchar(45) DEFAULT NULL,
6+
`city` varchar(45) DEFAULT NULL,
7+
`education` varchar(100) DEFAULT NULL,
8+
`positionAdvantage` varchar(100) DEFAULT NULL,
9+
`companyLabelList` varchar(100) DEFAULT NULL,
10+
`financeStage` varchar(45) DEFAULT NULL,
11+
`companySize` varchar(45) DEFAULT NULL,
12+
`industryField` varchar(100) DEFAULT NULL,
13+
`firstType` varchar(100) DEFAULT NULL,
14+
PRIMARY KEY (`id`)
15+
)

0 commit comments

Comments
 (0)