Skip to content

Commit ae51028

Browse files
committed
docs: update deployment script and add sitemap and robots files
1 parent 4f87119 commit ae51028

File tree

6 files changed

+160
-160
lines changed

6 files changed

+160
-160
lines changed

.github/workflows/deploy-docs.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ jobs:
3030
run: |
3131
mkdir -p site/docs
3232
cp -r temp_docs/* site/docs/
33-
cp -r landing/* site/
33+
cp -r docs/landing/* site/
34+
cp docs/sitemap.xml site/
35+
cp docs/landing-sitemap.xml site/
36+
cp docs/robots.txt site/
3437
3538
- name: Deploy to GitHub Pages
3639
uses: peaceiris/actions-gh-pages@v3

docs/landing-sitemap.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
3+
<url>
4+
<loc>https://pydoll.tech/</loc>
5+
<changefreq>daily</changefreq>
6+
<priority>1.0</priority>
7+
</url>
8+
</urlset>

docs/landing/index.html

Lines changed: 126 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -4,188 +4,158 @@
44
<meta charset="UTF-8" />
55
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
66
<meta name="theme-color" content="#0b1220" />
7-
<title>Pydoll Automate the Web, naturally</title>
7+
<title>Pydoll - Automate the Web, naturally</title>
88
<meta name="description" content="Pydoll is a Python browser automation (CDP) library for web scraping, captcha bypass (Cloudflare, reCAPTCHA), human-like interactions, and browser-context requests." />
99
<meta name="keywords" content="pydoll, browser automation, web automation, web scraping, scraping, data scraping, data extraction, crawler, crawling, headless browser, headless chrome, chrome devtools protocol, devtools protocol, cdp, python cdp, chrome cdp, playwright, puppeteer, selenium, selenium alternative, pyppeteer, undetected chromedriver, stealth, antibot, anti bot, bot detection, fingerprinting, captcha bypass, bypass cloudflare, cloudflare turnstile, recaptcha, hcaptcha, captcha solver, automation python, async python, asyncio, network interception, request interception, browser context requests, http requests browser context, humanized interactions, human-like interactions, automation library, scraping framework, scrapy, beautifulsoup, bs4, requests, aiohttp, automação de navegador, automação web, raspagem de dados, coleta de dados, robô, bot, detecção de bot, interações humanizadas, assíncrono, concorrência, múltiplas abas, preferências do navegador, downloads silenciosos" />
1010
<meta name="robots" content="index,follow" />
11-
<link rel="canonical" href="https://github.com/autoscrape-labs/pydoll" />
11+
<link rel="canonical" href="https://pydoll.tech/" />
1212

1313
<!-- Open Graph -->
14-
<meta property="og:title" content="Pydoll Automate the Web, naturally" />
14+
<meta property="og:title" content="Pydoll - Automate the Web, naturally" />
1515
<meta property="og:description" content="Browser automation for web scraping with human-like interactions, browser-context requests and captcha bypass (Cloudflare, reCAPTCHA)." />
1616
<meta property="og:type" content="website" />
1717
<meta property="og:image" content="https://github.com/user-attachments/assets/219f2dbc-37ed-4aea-a289-ba39cdbb335d" />
18-
<meta property="og:url" content="https://github.com/autoscrape-labs/pydoll" />
18+
<meta property="og:url" content="https://pydoll.tech/" />
1919
<meta property="og:site_name" content="Pydoll" />
20-
<meta property="og:locale" content="pt_BR" />
21-
<meta property="og:locale:alternate" content="en_US" />
20+
<meta property="og:locale" content="en_US" />
21+
<meta property="og:locale:alternate" content="pt_BR" />
2222

2323
<!-- Twitter Card -->
2424
<meta name="twitter:card" content="summary_large_image" />
25-
<meta name="twitter:title" content="Pydoll Automate the Web, naturally" />
25+
<meta name="twitter:title" content="Pydoll - Automate the Web, naturally" />
2626
<meta name="twitter:description" content="Browser automation for web scraping, browser-context requests and captcha bypass (Cloudflare, reCAPTCHA)." />
2727
<meta name="twitter:image" content="https://github.com/user-attachments/assets/219f2dbc-37ed-4aea-a289-ba39cdbb335d" />
2828

2929
<!-- Performance hints -->
3030
<link rel="preconnect" href="https://cdn.tailwindcss.com" />
3131
<link rel="preconnect" href="https://cdn.jsdelivr.net" />
3232
<link rel="preconnect" href="https://github.com" crossorigin />
33-
<link rel="preconnect" href="https://autoscrape-labs.github.io" />
33+
<link rel="preconnect" href="https://pydoll.tech/" />
3434

3535
<!-- Favicon -->
36-
<link rel="icon" type="image/png" href="../images/favicon.png" />
36+
<link rel="icon" type="image/png" href="./docs/images/favicon.png" />
3737

3838
<!-- Tailwind CSS via CDN -->
3939
<script src="https://cdn.tailwindcss.com"></script>
4040
<!-- Schema.org: SoftwareApplication / SoftwareSourceCode -->
4141
<script type="application/ld+json">
42-
{
43-
"@context": "https://schema.org",
44-
"@type": "SoftwareApplication",
45-
"name": "Pydoll",
46-
"applicationCategory": "DeveloperApplication",
47-
"operatingSystem": "Windows, macOS, Linux",
48-
"programmingLanguage": "Python",
49-
"url": "https://github.com/autoscrape-labs/pydoll",
50-
"image": "https://github.com/user-attachments/assets/219f2dbc-37ed-4aea-a289-ba39cdbb335d",
51-
"description": "Browser automation (CDP) for web scraping with human-like interactions, browser-context requests and captcha bypass (Cloudflare, reCAPTCHA).",
52-
"keywords": [
53-
"pydoll",
54-
"browser automation",
55-
"web automation",
56-
"web scraping",
57-
"scraping",
58-
"data scraping",
59-
"data extraction",
60-
"crawler",
61-
"crawling",
62-
"headless browser",
63-
"headless chrome",
64-
"chrome devtools protocol",
65-
"devtools protocol",
66-
"cdp",
67-
"python cdp",
68-
"chrome cdp",
69-
"playwright",
70-
"puppeteer",
71-
"selenium",
72-
"selenium alternative",
73-
"pyppeteer",
74-
"undetected chromedriver",
75-
"stealth",
76-
"antibot",
77-
"anti bot",
78-
"bot detection",
79-
"fingerprinting",
80-
"captcha bypass",
81-
"bypass cloudflare",
82-
"cloudflare turnstile",
83-
"recaptcha",
84-
"hcaptcha",
85-
"captcha solver",
86-
"automation python",
87-
"async python",
88-
"asyncio",
89-
"network interception",
90-
"request interception",
91-
"browser context requests",
92-
"http requests browser context",
93-
"humanized interactions",
94-
"human-like interactions",
95-
"automation library",
96-
"scraping framework",
97-
"scrapy",
98-
"beautifulsoup",
99-
"bs4",
100-
"requests",
101-
"aiohttp",
102-
"automação de navegador",
103-
"automação web",
104-
"raspagem de dados",
105-
"coleta de dados",
106-
"robô",
107-
"bot",
108-
"detecção de bot",
109-
"interações humanizadas",
110-
"assíncrono",
111-
"concorrência",
112-
"múltiplas abas",
113-
"preferências do navegador",
114-
"downloads silenciosos"
115-
],
116-
"offers": {
117-
"@type": "Offer",
118-
"price": "0",
119-
"priceCurrency": "USD"
120-
}
121-
}
122-
</script>
123-
<script type="application/ld+json">
124-
{
125-
"@context": "https://schema.org",
126-
"@type": "FAQPage",
127-
"mainEntity": [
128-
{
129-
"@type": "Question",
130-
"name": "What is Pydoll and why doesn't it use WebDriver?",
131-
"acceptedAnswer": {
132-
"@type": "Answer",
133-
"text": "Pydoll is a Python library that controls the browser via the Chrome DevTools Protocol (CDP), eliminating WebDrivers. This reduces layers, improves reliability and gives direct access to page events, network interception and JavaScript execution in the real tab context."
134-
}
135-
},
136-
{
137-
"@type": "Question",
138-
"name": "Can Pydoll handle CAPTCHAs like Cloudflare Turnstile or reCAPTCHA v3?",
139-
"acceptedAnswer": {
140-
"@type": "Answer",
141-
"text": "It provides human-like interactions (movement, click, typing) and a helper to attempt Turnstile bypass. Effectiveness depends on IP reputation and interaction pattern. Combine with quality proxies and good navigation practices."
142-
}
143-
},
144-
{
145-
"@type": "Question",
146-
"name": "What are browser-context requests and when to use them?",
147-
"acceptedAnswer": {
148-
"@type": "Answer",
149-
"text": "With tab.request you perform HTTP in the same tab context: cookies, session and CORS are automatically inherited. Ideal for hybrid automation — log in via UI and then call authenticated application APIs."
150-
}
151-
},
152-
{
153-
"@type": "Question",
154-
"name": "What are the practical differentiators: concurrency, events and preferences?",
155-
"acceptedAnswer": {
156-
"@type": "Answer",
157-
"text": "Concurrency with asyncio.gather, reactive events (Page/Network/Runtime) for responsive automations and browser preferences control for silent downloads, languages and more."
42+
{
43+
"@context": "https://schema.org",
44+
"@graph": [
45+
{
46+
"@type": "WebSite",
47+
"@id": "https://pydoll.tech/#website",
48+
"name": "Pydoll",
49+
"url": "https://pydoll.tech/",
50+
"potentialAction": {
51+
"@type": "SearchAction",
52+
"target": "https://pydoll.tech/docs/search/?q={search_term_string}",
53+
"query-input": "required name=search_term_string"
54+
}
55+
},
56+
{
57+
"@type": "WebPage",
58+
"@id": "https://pydoll.tech/#webpage",
59+
"url": "https://pydoll.tech/",
60+
"name": "Pydoll - Async Web Automation Library",
61+
"isPartOf": { "@id": "https://pydoll.tech/#website" },
62+
"description": "Browser automation (CDP) for web scraping with human-like interactions, browser-context requests and captcha bypass (Cloudflare, reCAPTCHA)."
63+
},
64+
{
65+
"@type": "SoftwareApplication",
66+
"@id": "https://pydoll.tech/#software",
67+
"name": "Pydoll",
68+
"applicationCategory": "DeveloperApplication",
69+
"operatingSystem": "Windows, macOS, Linux",
70+
"programmingLanguage": "Python",
71+
"url": "https://github.com/autoscrape-labs/pydoll",
72+
"image": "https://github.com/user-attachments/assets/219f2dbc-37ed-4aea-a289-ba39cdbb335d",
73+
"description": "Browser automation (CDP) for web scraping with human-like interactions, browser-context requests and captcha bypass (Cloudflare, reCAPTCHA).",
74+
"keywords": [
75+
"pydoll",
76+
"browser automation",
77+
"web automation",
78+
"web scraping",
79+
"data extraction",
80+
"crawler",
81+
"headless browser",
82+
"chrome devtools protocol",
83+
"python cdp",
84+
"playwright",
85+
"puppeteer",
86+
"selenium alternative",
87+
"antibot",
88+
"captcha bypass",
89+
"cloudflare turnstile",
90+
"recaptcha",
91+
"async python",
92+
"automation library",
93+
"scraping framework"
94+
],
95+
"offers": {
96+
"@type": "Offer",
97+
"price": "0",
98+
"priceCurrency": "USD"
99+
}
100+
},
101+
{
102+
"@type": "SoftwareSourceCode",
103+
"name": "Pydoll",
104+
"codeRepository": "https://github.com/autoscrape-labs/pydoll",
105+
"programmingLanguage": "Python",
106+
"license": "https://github.com/autoscrape-labs/pydoll/blob/main/LICENSE",
107+
"description": "Python library for browser automation via Chrome DevTools Protocol (CDP), with human-like interactions, browser-context requests and captcha bypass (Cloudflare, reCAPTCHA).",
108+
"keywords": [
109+
"pydoll",
110+
"browser automation",
111+
"web scraping",
112+
"chrome devtools protocol",
113+
"async python",
114+
"captcha bypass"
115+
],
116+
"url": "https://pydoll.tech/"
117+
},
118+
{
119+
"@type": "FAQPage",
120+
"mainEntity": [
121+
{
122+
"@type": "Question",
123+
"name": "What is Pydoll and why doesn't it use WebDriver?",
124+
"acceptedAnswer": {
125+
"@type": "Answer",
126+
"text": "Pydoll is a Python library that controls the browser via the Chrome DevTools Protocol (CDP), eliminating WebDrivers. This reduces layers, improves reliability and gives direct access to page events, network interception and JavaScript execution in the real tab context."
127+
}
128+
},
129+
{
130+
"@type": "Question",
131+
"name": "Can Pydoll handle CAPTCHAs like Cloudflare Turnstile or reCAPTCHA v3?",
132+
"acceptedAnswer": {
133+
"@type": "Answer",
134+
"text": "It provides human-like interactions (movement, click, typing) and a helper to attempt Turnstile bypass. Effectiveness depends on IP reputation and interaction pattern. Combine with quality proxies and good navigation practices."
135+
}
136+
},
137+
{
138+
"@type": "Question",
139+
"name": "What are browser-context requests and when to use them?",
140+
"acceptedAnswer": {
141+
"@type": "Answer",
142+
"text": "With tab.request you perform HTTP in the same tab context: cookies, session and CORS are automatically inherited. Ideal for hybrid automation — log in via UI and then call authenticated application APIs."
143+
}
144+
},
145+
{
146+
"@type": "Question",
147+
"name": "What are the practical differentiators: concurrency, events and preferences?",
148+
"acceptedAnswer": {
149+
"@type": "Answer",
150+
"text": "Concurrency with asyncio.gather, reactive events (Page/Network/Runtime) for responsive automations and browser preferences control for silent downloads, languages and more."
151+
}
152+
}
153+
]
158154
}
159-
}
160-
]
161-
}
162-
</script>
163-
<script type="application/ld+json">
164-
{
165-
"@context": "https://schema.org",
166-
"@type": "SoftwareSourceCode",
167-
"name": "Pydoll",
168-
"codeRepository": "https://github.com/autoscrape-labs/pydoll",
169-
"programmingLanguage": "Python",
170-
"license": "https://github.com/autoscrape-labs/pydoll/blob/main/LICENSE",
171-
"description": "Python library for browser automation via Chrome DevTools Protocol (CDP), with human-like interactions, browser-context requests and captcha bypass (Cloudflare, reCAPTCHA).",
172-
"keywords": "pydoll, browser automation, web automation, web scraping, scraping, data scraping, data extraction, crawler, crawling, headless browser, headless chrome, chrome devtools protocol, devtools protocol, cdp, python cdp, chrome cdp, playwright, puppeteer, selenium, selenium alternative, pyppeteer, undetected chromedriver, stealth, antibot, anti bot, bot detection, fingerprinting, captcha bypass, bypass cloudflare, cloudflare turnstile, recaptcha, hcaptcha, captcha solver, automation python, async python, asyncio, network interception, request interception, browser context requests, http requests browser context, humanized interactions, human-like interactions, automation library, scraping framework, scrapy, beautifulsoup, bs4, requests, aiohttp, automação de navegador, automação web, raspagem de dados, coleta de dados, robô, bot, detecção de bot, interações humanizadas, assíncrono, concorrência, múltiplas abas, preferências do navegador, downloads silenciosos",
173-
"url": "https://github.com/autoscrape-labs/pydoll"
174-
}
175-
</script>
176-
<script type="application/ld+json">
177-
{
178-
"@context": "https://schema.org",
179-
"@type": "WebSite",
180-
"name": "Pydoll",
181-
"url": "https://autoscrape-labs.github.io/pydoll/",
182-
"potentialAction": {
183-
"@type": "SearchAction",
184-
"target": "https://autoscrape-labs.github.io/pydoll/search/?q={search_term_string}",
185-
"query-input": "required name=search_term_string"
155+
]
186156
}
187-
}
188-
</script>
157+
</script>
158+
189159
<script>
190160
tailwind.config = {
191161
theme: {
@@ -281,7 +251,7 @@ <h1 class="text-4xl font-bold tracking-tight sm:text-6xl">
281251
</div>
282252
<div class="mt-12 w-full lg:mt-0 lg:max-w-xl lg:flex-none reveal">
283253
<div class="tilt-card relative overflow-hidden rounded-xl border border-white/10 bg-slate-900/40 p-2 shadow-xl">
284-
<img src="../images/cloudflare-example.gif" alt="Bypass Cloudflare com Pydoll" class="w-full rounded-lg ring-1 ring-white/10" decoding="async" />
254+
<img src="./docs/images/cloudflare-example.gif" alt="Bypass Cloudflare com Pydoll" class="w-full rounded-lg ring-1 ring-white/10" decoding="async" />
285255
</div>
286256
<p class="mt-4 text-center text-xs text-slate-400">Cloudflare captcha bypass example</p>
287257
</div>
@@ -397,7 +367,7 @@ <h2 class="text-2xl font-bold tracking-tight">Install and get started in seconds
397367
<span class="ml-3 text-[10px] font-medium tracking-wide text-slate-400">terminal</span>
398368
</div>
399369
<div class="relative h-64 sm:h-72 md:h-80 lg:h-80">
400-
<img src="../images/google-search-example.gif" alt="Pydoll automation example" class="absolute inset-0 h-full w-full object-contain" loading="lazy" decoding="async" />
370+
<img src="./docs/images/google-search-example.gif" alt="Pydoll automation example" class="absolute inset-0 h-full w-full object-contain" loading="lazy" decoding="async" />
401371
</div>
402372
</div>
403373
</div>

docs/robots.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
User-agent: *
2+
Allow: /
3+
Sitemap: https://pydoll.tech/sitemap.xml

docs/sitemap.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
3+
<sitemap>
4+
<loc>https://pydoll.tech/landing-sitemap.xml</loc>
5+
</sitemap>
6+
<sitemap>
7+
<loc>https://pydoll.tech/docs/sitemap.xml</loc>
8+
</sitemap>
9+
</sitemapindex>

mkdocs.yml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
site_name: Pydoll - Async Web Automation Library
2-
site_url: https://pydoll.tech
2+
site_url: https://pydoll.tech/docs/
33
repo_url: https://github.com/autoscrape-labs/pydoll
44
repo_name: autoscrape-labs/pydoll
55
use_directory_urls: true
@@ -105,7 +105,7 @@ plugins:
105105
- i18n:
106106
docs_structure: folder
107107
fallback_to_default: true
108-
reconfigure_material: true
108+
reconfigure_material: false
109109
reconfigure_search: true
110110
languages:
111111
- locale: en
@@ -190,7 +190,14 @@ plugins:
190190
show_bases: true
191191
heading_level: 1
192192

193-
extra: {}
193+
extra:
194+
alternate:
195+
- name: English
196+
link: /docs/
197+
lang: en
198+
- name: 中文
199+
link: /docs/zh/
200+
lang: zh
194201

195202
extra_css:
196203
- stylesheets/termynal.css

0 commit comments

Comments
 (0)