Skip to content

Commit 0c7dbc3

Browse files
committed
Init Commit
These are all projects for scraping and automation, and also include 4 four project used python, scrapy, selenium, plotly.js and symfony-goutte.
1 parent cb89450 commit 0c7dbc3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+2654
-0
lines changed

Disney_Scraping/app.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import requests
2+
import json
3+
4+
5+
urls = {
6+
'token_url': 'https://global.edge.bamgrid.com/token',
7+
'collection_url': 'https://search-api-disney.svcs.dssott.com/svc/search/v2/graphql/persisted/query/core/CollectionBySlug',
8+
'section_url': 'https://search-api-disney.svcs.dssott.com/svc/search/v2/graphql/persisted/query/core/SetBySetId'
9+
}
10+
11+
12+
def get_auth_token():
13+
payload = {
14+
'grant_type': 'refresh_token',
15+
'latitude': 0,
16+
'longitude': 0,
17+
'platform': 'browser',
18+
'refresh_token': 'eyJraWQiOiJlNzRlOTlhNy04NDNlLTQ2NmEtOTVhMS02YjA0MjYwNThlNmYiLCJhbGciOiJFZERTQSJ9.eyJhdWQiOiJ1cm46YmFtdGVjaDpzZXJ2aWNlOnRva2VuIiwic3ViamVjdF90b2tlbl90eXBlIjoidXJuOmJhbXRlY2g6cGFyYW1zOm9hdXRoOnRva2VuLXR5cGU6ZGV2aWNlIiwibmJmIjoxNTg0MzQxMDc3LCJncmFudF90eXBlIjoidXJuOmlldGY6cGFyYW1zOm9hdXRoOmdyYW50LXR5cGU6dG9rZW4tZXhjaGFuZ2UiLCJpc3MiOiJ1cm46YmFtdGVjaDpzZXJ2aWNlOnRva2VuIiwiY29udGV4dCI6ImV5SmhiR2NpT2lKdWIyNWxJbjAuZXlKemRXSWlPaUptT0RObE1qQXdZUzA0WmpFMExUUXpZbVF0T0RObE1pMHlaV1EzTmpJeE56VXdZemdpTENKaGRXUWlPaUoxY200NlltRnRkR1ZqYURwelpYSjJhV05sT25SdmEyVnVJaXdpYm1KbUlqb3hOVGcwTXpJNU1EVXpMQ0pwYzNNaU9pSjFjbTQ2WW1GdGRHVmphRHB6WlhKMmFXTmxPbVJsZG1salpTSXNJbVY0Y0NJNk1qUTBPRE15T1RBMU15d2lhV0YwSWpveE5UZzBNekk1TURVekxDSnFkR2tpT2lJMU1HRTBNMlU1WkMweU1tRmtMVFJoTldZdE9HUmpNaTB4TUdZMU5HVXhPR00xT0RjaWZRLiIsImV4cCI6MTU5OTg5MzA3NywiaWF0IjoxNTg0MzQxMDc3LCJqdGkiOiI3NzI0NDY1OS04NzBiLTQ4ZTEtYmY4Yi00NmQ2NmE1MTk2MDcifQ.EWn4KXvf3xgYb9gDSNbcD3xN4qcQVwiEUd45q2sfG9_Zcy06OnTzfIokWAAuNUQzc9Fm6bpEh7__D7M8KZ_IBw'
19+
}
20+
21+
headers = {
22+
'Content-Type': 'application/x-www-form-urlencoded',
23+
'authorization': 'Bearer ZGlzbmV5JmJyb3dzZXImMS4wLjA.Cu56AgSfBTDag5NiRA81oLHkDZfu5L3CKadnefEAY84'
24+
}
25+
26+
token = requests.post(
27+
urls['token_url'], data=payload, headers=headers).json()
28+
return token
29+
30+
31+
def get_section_info():
32+
section_info_dict = {}
33+
34+
token_data = get_auth_token()
35+
auth_token = token_data['access_token']
36+
37+
params = {
38+
'variables': '{"preferredLanguage":["en"],"contentClass":"home","slug":"home","contentTransactionId":"2c278173-12a7-4bb6-85c0-13c23cd93370"}'
39+
}
40+
41+
headers = {
42+
'authorization': "Bearer " + auth_token,
43+
}
44+
45+
data = requests.get(urls['collection_url'],
46+
params=params, headers=headers).json()
47+
48+
index = 0
49+
sections = data['data']['CollectionBySlug']['containers']
50+
51+
for i in range(2, len(sections)):
52+
section_info_dict[index] = {}
53+
if 'refId' in sections[i]['set'].keys():
54+
section_info_dict[index] = {
55+
'setId': sections[i]['set']['refId'], 'setType': sections[i]['set']['refType']}
56+
elif 'setId' in sections[i]['set'].keys():
57+
section_info_dict[index] = {
58+
'setId': sections[i]['set']['setId'], 'setType': sections[i]['set']['type']}
59+
60+
index = index + 1
61+
62+
return section_info_dict
63+
64+
65+
def get_section_data(section_info):
66+
result_data = {}
67+
68+
token_data = get_auth_token()
69+
auth_token = token_data['access_token']
70+
71+
params = {
72+
'variables': '{"preferredLanguage": ["en"], "setId":"' + section_info['setId'] + '",'
73+
'"setType":"' + section_info['setType'] + '", "contentTransactionId": "a099f643-6021-4b28-a687-512bbe546e0d"}'
74+
}
75+
76+
headers = {
77+
'authorization': "Bearer " + auth_token,
78+
'Content-Type': 'application/x-www-form-urlencoded'
79+
}
80+
81+
data = requests.get(urls['section_url'],
82+
params=params, headers=headers).json()
83+
84+
result_data['Name'] = data['data']['SetBySetId']['texts'][0]['content']
85+
result_data['Items'] = []
86+
87+
for i in range(len(data['data']['SetBySetId']['items'])):
88+
result_image = ''
89+
result_name = data['data']['SetBySetId']['items'][i]['texts'][0]['content']
90+
if(result_data['Name'] == "Collections"):
91+
result_image = data['data']['SetBySetId']['items'][i]['images'][4]['url']
92+
elif len(data['data']['SetBySetId']['items'][i]['images']) > 8:
93+
result_image = data['data']['SetBySetId']['items'][i]['images'][9]['url']
94+
95+
result_data['Items'].append({'Name': result_name, 'Image': result_image})
96+
print(result_data)
97+
return result_data
98+
99+
open('data.json', 'w').close()
100+
101+
for key, value in get_section_info().items():
102+
section_data = get_section_data(value)
103+
104+
with open('data.json', 'a', encoding='utf-8') as f:
105+
f.write(json.dumps(section_data))

Gmail_Scraping/.gitignore

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# Environment Build Files
2+
env.py
3+
config_local.py
4+
5+
# Remove main config
6+
config_live.py
7+
8+
# Exclude Gulp Files
9+
node_modules/
10+
11+
# Exclude temp uploads
12+
static/images/temp/
13+
static/images/d1/
14+
15+
16+
17+
18+
# Byte-compiled / optimized / DLL files
19+
__pycache__/
20+
*.py[cod]
21+
*$py.class
22+
23+
# C extensions
24+
*.so
25+
26+
# Distribution / packaging
27+
.Python
28+
build/
29+
develop-eggs/
30+
dist/
31+
downloads/
32+
eggs/
33+
.eggs/
34+
lib/
35+
lib64/
36+
parts/
37+
sdist/
38+
var/
39+
wheels/
40+
*.egg-info/
41+
.installed.cfg
42+
*.egg
43+
MANIFEST
44+
45+
# PyInstaller
46+
# Usually these files are written by a python script from a template
47+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
48+
*.manifest
49+
*.spec
50+
51+
# Installer logs
52+
pip-log.txt
53+
pip-delete-this-directory.txt
54+
55+
# Unit test / coverage reports
56+
htmlcov/
57+
.tox/
58+
.coverage
59+
.coverage.*
60+
.cache
61+
nosetests.xml
62+
coverage.xml
63+
*.cover
64+
.hypothesis/
65+
.pytest_cache/
66+
67+
# Translations
68+
*.mo
69+
*.pot
70+
71+
# Django stuff:
72+
*.log
73+
local_settings.py
74+
db.sqlite3
75+
76+
# Flask stuff:
77+
instance/
78+
.webassets-cache
79+
80+
# Scrapy stuff:
81+
.scrapy
82+
83+
# Sphinx documentation
84+
docs/_build/
85+
86+
# PyBuilder
87+
target/
88+
89+
# Jupyter Notebook
90+
.ipynb_checkpoints
91+
92+
# pyenv
93+
.python-version
94+
95+
# celery beat schedule file
96+
celerybeat-schedule
97+
98+
# SageMath parsed files
99+
*.sage.py
100+
101+
# Environments
102+
.env
103+
.venv
104+
env/
105+
venv/
106+
ENV/
107+
env.bak/
108+
venv.bak/
109+
110+
# Spyder project settings
111+
.spyderproject
112+
.spyproject
113+
114+
# Rope project settings
115+
.ropeproject
116+
117+
# mkdocs documentation
118+
/site
119+
120+
# mypy
121+
.mypy_cache/

Gmail_Scraping/app.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
################################
2+
#
3+
# Build App
4+
#
5+
################################
6+
7+
import os
8+
import logging
9+
10+
from datetime import datetime
11+
from logging.config import dictConfig
12+
13+
from flask import Flask, g, current_app, request, make_response
14+
from flask_restful import abort
15+
from flask_cors import CORS
16+
17+
from app.config import LocalConfig
18+
from app.common.extensions import CustomApi
19+
from app.common.database import Database
20+
from app.common.helpers import default_schema, milli_time
21+
22+
from app.views.home import *
23+
24+
25+
def create_app(production=False):
26+
config = LocalConfig
27+
28+
# Create app
29+
app = Flask(__name__)
30+
app.config.from_object(config)
31+
app.config['TEMPLATES_AUTO_RELOAD'] = True
32+
app.secret_key = app.config['APP_SECRET_KEY']
33+
app.url_map.strict_slashes = False
34+
app.jinja_env.cache = {}
35+
app.jinja_env.auto_reload = True
36+
37+
# create api
38+
CORS(app)
39+
api = CustomApi(app, prefix='/api/v1')
40+
41+
# Initializing the logger
42+
dictConfig(app.config['LOGGING'])
43+
44+
# register_extensions(app)
45+
register_hooks(app)
46+
# register_endpoints(api)
47+
register_routes(app)
48+
49+
return app
50+
51+
def register_hooks(app):
52+
def db_has_connection():
53+
return hasattr(g,'db')
54+
55+
def get_db_connection():
56+
if not db_has_connection():
57+
try:
58+
g.db = Database(
59+
host=current_app.config['MYSQL']['HOST'],
60+
db=current_app.config['MYSQL']['DB'],
61+
user=current_app.config['MYSQL']['USER'],
62+
passwd=current_app.config['MYSQL']['PASS'],
63+
)
64+
except Exception as e:
65+
abort(500,
66+
status=0,
67+
message='Failed to connect to CORE Database.',
68+
errors=dict(
69+
application='There was a problem connecting to MySQL.',
70+
validation=None
71+
),
72+
http_status=500
73+
)
74+
return g.db
75+
76+
@app.before_request
77+
def before_request():
78+
if request.path.startswith('/favicon.ico'):
79+
response = make_response('', 204)
80+
response.headers['Content-Length'] = 0
81+
response.status_code = 204
82+
return response
83+
84+
g.start_time = milli_time()
85+
86+
get_db_connection()
87+
88+
@app.teardown_request
89+
def close_db_connection(ex):
90+
if db_has_connection():
91+
conn = get_db_connection()
92+
conn.close()
93+
94+
95+
def register_routes(app):
96+
97+
#############################################
98+
#
99+
# Dashboard home
100+
101+
# Campaigns
102+
app.add_url_rule('/', view_func=Home.as_view('home'))
103+
104+

0 commit comments

Comments
 (0)