Skip to content

Commit c1c8305

Browse files
author
dmy.berezovskyi
committed
improved chrome_scraper.py
added cli
1 parent 7ce6306 commit c1c8305

File tree

7 files changed

+102
-17
lines changed

7 files changed

+102
-17
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ coverage.xml
5151
.pytest_cache/
5252
cover/
5353
*.env
54-
resources/local
54+
resources/chromedriver
5555
resources/firefox
5656
resources/ubuntuchrome
5757

core_driver/driver.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def get_desired_caps(self, browser="chrome"):
4949

5050

5151
class LocalDriver(Driver):
52-
def create_driver(self, environment=None, dr_type="local"):
52+
def create_driver(self, environment=None, dr_type="chromedriver"):
5353
"""Tries to use ChromeDriverManager to install the latest driver,
5454
and if it fails, it falls back to a locally stored driver in resources."""
5555
driver = None

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ requests="^2.31.0"
2525
setuptools="70.0.0"
2626
ruff="0.6.8"
2727
secure-test-automation="^1.3.1"
28+
colorama="==0.4.6"
29+
rich="==13.9.4"
2830

2931

3032
[tool.pytest.ini_options]
File renamed without changes.

scraper/chrome_scraper.py

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
import json
2-
from typing import Dict
2+
import pathlib
3+
from typing import Dict, Optional
34

45
import requests
6+
from pathlib import Path
7+
import zipfile
8+
from io import BytesIO
9+
510
from bs4 import BeautifulSoup
611

712
from scraper.os_checker import OSChecker
@@ -11,24 +16,23 @@ class ChromePageScraper:
1116
URL_LATEST = (
1217
"https://googlechromelabs.github.io/chrome-for-testing/#stable"
1318
)
14-
URL_ALL = "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json" # noqa
19+
URL_ALL = "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json"
1520

1621
@staticmethod
1722
def __fetch(url: str) -> requests.Response:
1823
response = requests.get(url)
19-
response.raise_for_status() # Raises an exception if status code is not 200 # noqa
24+
response.raise_for_status()
2025
return response
2126

2227
@staticmethod
2328
def parse_latest() -> Dict[str, str]:
24-
# returns a latest stable chrome driver
2529
elements_list = []
2630
drivers = {}
2731
page = ChromePageScraper.__fetch(ChromePageScraper.URL_LATEST)
2832

2933
soup = BeautifulSoup(page.text, "html.parser")
3034
element = soup.select_one(
31-
"section#stable.status-not-ok div.table-wrapper table tbody tr.status-ok" # noqa
35+
"section#stable.status-not-ok div.table-wrapper table tbody tr.status-ok"
3236
)
3337

3438
if not element:
@@ -55,12 +59,18 @@ def get_latest_driver(self, os_name: str):
5559
print(drivers[os_name])
5660

5761
@staticmethod
58-
def get_chromedriver(platform=None, version=None, milestone=None):
62+
def get_chromedriver(platform=None,
63+
version=None,
64+
milestone=None,
65+
d_dir: Optional[pathlib.Path] = None,
66+
is_extracted: bool = False
67+
):
5968
"""
60-
6169
:param platform: os_name and architecture
6270
:param version: your chrome browser version
6371
:param milestone: first 3 digits of a browser version: 129 or etc
72+
:param d_dir: Directory to save the chromedriver zip file
73+
:param is_extracted: extracts the chromedriver
6474
:return:
6575
"""
6676
if version is None and milestone is None:
@@ -71,6 +81,8 @@ def get_chromedriver(platform=None, version=None, milestone=None):
7181
if platform is None:
7282
platform = OSChecker.check_os()
7383

84+
download_dir = d_dir or Path(__file__).resolve().parent.parent / "resources"
85+
7486
# Parse the JSON data
7587
parsed_data = json.loads(
7688
ChromePageScraper.__fetch(ChromePageScraper.URL_ALL).text
@@ -79,18 +91,31 @@ def get_chromedriver(platform=None, version=None, milestone=None):
7991

8092
for milestone_key, milestone_data in milestones_data.items():
8193
if (milestone is None or milestone_key == milestone) and (
82-
version is None or milestone_data["version"] == version
94+
version is None or milestone_data["version"] == version
8395
):
8496
if "chromedriver" in milestone_data["downloads"]:
85-
for chromedriver_info in milestone_data["downloads"][
86-
"chromedriver"
87-
]:
97+
for chromedriver_info in milestone_data["downloads"]["chromedriver"]:
8898
if (
89-
platform is None
90-
or chromedriver_info["platform"] == platform
99+
platform is None
100+
or chromedriver_info["platform"] == platform
91101
):
92-
return chromedriver_info
102+
url = chromedriver_info["url"]
103+
response = requests.get(url)
104+
response.raise_for_status() # Check status
105+
106+
download_dir.mkdir(parents=True, exist_ok=True)
107+
download_path = download_dir / "chromedriver.zip"
108+
109+
with open(download_path, "wb") as file:
110+
file.write(response.content)
111+
print(f"Chromedriver downloaded to {download_dir}")
112+
113+
if is_extracted:
114+
with zipfile.ZipFile(BytesIO(response.content)) as zip_ref:
115+
zip_ref.extractall(download_dir)
93116

117+
print(f"Chromedriver extracted to {download_dir}")
118+
return download_path
94119

95120
if __name__ == "__main__":
96-
print(ChromePageScraper.get_chromedriver(milestone="129"))
121+
ChromePageScraper.get_chromedriver(milestone="131")

utils/cli/__init__.py

Whitespace-only changes.

utils/cli/cli.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import argparse
2+
from pathlib import Path
3+
4+
from pyfiglet import Figlet
5+
from rich.console import Console
6+
from colorama import Fore, Style
7+
8+
from scraper.chrome_scraper import ChromePageScraper
9+
10+
11+
def create_cli():
12+
# Initialize ArgumentParser
13+
parser = argparse.ArgumentParser(prog="sstf", description="SSTF Command Line Tool")
14+
15+
# Add the 'get' subcommand
16+
subparsers = parser.add_subparsers(dest="command")
17+
18+
# Add subcommand for 'get'
19+
get_parser = subparsers.add_parser("get", help="Download and manage Chromedriver")
20+
get_subparsers = get_parser.add_subparsers(dest="subcommand")
21+
22+
# Add subcommand for 'chromedriver'
23+
chromedriver_parser = get_subparsers.add_parser("chromedriver",
24+
help="Download chromedriver for a specified version and platform")
25+
chromedriver_parser.add_argument('--milestone', type=str,
26+
help=f"{Fore.CYAN}Chromium milestone version (e.g., 131).{Style.RESET_ALL}")
27+
chromedriver_parser.add_argument('--version', type=str,
28+
help=f"{Fore.CYAN}Chromium browser version.{Style.RESET_ALL}")
29+
chromedriver_parser.add_argument('--platform', type=str, choices=["windows", "mac", "linux"],
30+
help=f"{Fore.CYAN}Operating system platform.{Style.RESET_ALL}")
31+
chromedriver_parser.add_argument('--output-dir', type=str, default=None,
32+
help=f"{Fore.CYAN}Directory to save the downloaded Chromedriver.{Style.RESET_ALL}")
33+
chromedriver_parser.add_argument('--extract', action='store_true',
34+
help=f"{Fore.CYAN}Extract the Chromedriver after download.{Style.RESET_ALL}")
35+
36+
# Parse arguments
37+
args = parser.parse_args()
38+
39+
# Handle 'get chromedriver' logic
40+
if args.command == "get" and args.subcommand == "chromedriver":
41+
console = Console()
42+
43+
# ASCII Art Header with Figlet (using Rich)
44+
fig = Figlet(font="slant") # You can use different fonts like 'slant', 'block', etc.
45+
console.print(fig.renderText("Chromedriver Download"), style="bold green")
46+
47+
# Run the actual logic for downloading chromedriver
48+
ChromePageScraper.get_chromedriver(
49+
platform=args.platform,
50+
version=args.version,
51+
milestone=args.milestone,
52+
d_dir=Path(args.output_dir) if args.output_dir else None,
53+
is_extracted=args.extract
54+
)
55+
56+
57+
if __name__ == "__main__":
58+
create_cli()

0 commit comments

Comments
 (0)