Skip to content

Commit 989d8bd

Browse files
authored
Merge pull request #764 from getmaxun/develop
chore: release v0.0.23
2 parents 37d788d + 02f09e0 commit 989d8bd

File tree

29 files changed

+1500
-636
lines changed

29 files changed

+1500
-636
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web
1919
<a href="https://docs.maxun.dev/?ref=ghread"><b>Documentation</b></a> |
2020
<a href="https://www.maxun.dev/?ref=ghread"><b>Website</b></a> |
2121
<a href="https://discord.gg/5GbPjBUkws"><b>Discord</b></a> |
22-
<a href="https://x.com/maxun_io?ref=ghread"><b>Twitter</b></a> |
22+
<a href="https://x.com/MaxunHQ?ref=ghread"><b>Twitter</b></a> |
2323
<a href="https://www.youtube.com/@MaxunOSS?ref=ghread"><b>Watch Tutorials</b></a>
2424
<br />
2525
<br />

docker-compose.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ services:
4343
# to ensure Playwright works in Docker
4444
PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
4545
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 0
46+
# Force container/CI detection for headless mode
47+
CI: "true"
48+
CONTAINER: "true"
4649
# DEBUG: pw:api
4750
# PWDEBUG: 1 # Enables debugging
4851
CHROMIUM_FLAGS: '--disable-gpu --no-sandbox --headless=new'

maxun-core/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "maxun-core",
3-
"version": "0.0.22",
3+
"version": "0.0.23",
44
"description": "Core package for Maxun, responsible for data extraction",
55
"main": "build/index.js",
66
"typings": "build/index.d.ts",

maxun-core/src/interpret.ts

Lines changed: 90 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ export default class Interpreter extends EventEmitter {
6464
private concurrency: Concurrency;
6565

6666
private stopper: Function | null = null;
67+
68+
private isAborted: boolean = false;
6769

6870
private log: typeof log;
6971

@@ -114,6 +116,13 @@ export default class Interpreter extends EventEmitter {
114116
})
115117
}
116118

119+
/**
120+
* Sets the abort flag to immediately stop all operations
121+
*/
122+
public abort(): void {
123+
this.isAborted = true;
124+
}
125+
117126
private async applyAdBlocker(page: Page): Promise<void> {
118127
if (this.blocker) {
119128
try {
@@ -372,6 +381,11 @@ export default class Interpreter extends EventEmitter {
372381
* @param steps Array of actions.
373382
*/
374383
private async carryOutSteps(page: Page, steps: What[]): Promise<void> {
384+
if (this.isAborted) {
385+
this.log('Workflow aborted, stopping execution', Level.WARN);
386+
return;
387+
}
388+
375389
/**
376390
* Defines overloaded (or added) methods/actions usable in the workflow.
377391
* If a method overloads any existing method of the Page class, it accepts the same set
@@ -433,6 +447,11 @@ export default class Interpreter extends EventEmitter {
433447
},
434448

435449
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; shadow: string}>) => {
450+
if (this.isAborted) {
451+
this.log('Workflow aborted, stopping scrapeSchema', Level.WARN);
452+
return;
453+
}
454+
436455
if (this.options.debugChannel?.setActionType) {
437456
this.options.debugChannel.setActionType('scrapeSchema');
438457
}
@@ -468,6 +487,11 @@ export default class Interpreter extends EventEmitter {
468487
},
469488

470489
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
490+
if (this.isAborted) {
491+
this.log('Workflow aborted, stopping scrapeList', Level.WARN);
492+
return;
493+
}
494+
471495
if (this.options.debugChannel?.setActionType) {
472496
this.options.debugChannel.setActionType('scrapeList');
473497
}
@@ -622,6 +646,11 @@ export default class Interpreter extends EventEmitter {
622646
limit?: number,
623647
pagination: any
624648
}) {
649+
if (this.isAborted) {
650+
this.log('Workflow aborted, stopping pagination', Level.WARN);
651+
return [];
652+
}
653+
625654
let allResults: Record<string, any>[] = [];
626655
let previousHeight = 0;
627656
let scrapedItems: Set<string> = new Set<string>();
@@ -635,6 +664,12 @@ export default class Interpreter extends EventEmitter {
635664
};
636665

637666
const scrapeCurrentPage = async () => {
667+
// Check abort flag before scraping current page
668+
if (this.isAborted) {
669+
debugLog("Workflow aborted, stopping scrapeCurrentPage");
670+
return;
671+
}
672+
638673
const results = await page.evaluate((cfg) => window.scrapeList(cfg), config);
639674
const newResults = results.filter(item => {
640675
const uniqueKey = JSON.stringify(item);
@@ -723,7 +758,12 @@ export default class Interpreter extends EventEmitter {
723758
let unchangedResultCounter = 0;
724759

725760
try {
726-
while (true) {
761+
while (true) {
762+
if (this.isAborted) {
763+
this.log('Workflow aborted during pagination loop', Level.WARN);
764+
return allResults;
765+
}
766+
727767
switch (config.pagination.type) {
728768
case 'scrollDown': {
729769
let previousResultCount = allResults.length;
@@ -734,10 +774,22 @@ export default class Interpreter extends EventEmitter {
734774
return allResults;
735775
}
736776

737-
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
777+
await page.evaluate(() => {
778+
const scrollHeight = Math.max(
779+
document.body.scrollHeight,
780+
document.documentElement.scrollHeight
781+
);
782+
783+
window.scrollTo(0, scrollHeight);
784+
});
738785
await page.waitForTimeout(2000);
739786

740-
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
787+
const currentHeight = await page.evaluate(() => {
788+
return Math.max(
789+
document.body.scrollHeight,
790+
document.documentElement.scrollHeight
791+
);
792+
});
741793
const currentResultCount = allResults.length;
742794

743795
if (currentResultCount === previousResultCount) {
@@ -969,6 +1021,11 @@ export default class Interpreter extends EventEmitter {
9691021
// const MAX_NO_NEW_ITEMS = 2;
9701022

9711023
while (true) {
1024+
if (this.isAborted) {
1025+
this.log('Workflow aborted during pagination loop', Level.WARN);
1026+
return allResults;
1027+
}
1028+
9721029
// Find working button with retry mechanism
9731030
const { button: loadMoreButton, workingSelector, updatedSelectors } = await findWorkingButton(availableSelectors);
9741031

@@ -1024,10 +1081,22 @@ export default class Interpreter extends EventEmitter {
10241081

10251082
// Wait for content to load and check scroll height
10261083
await page.waitForTimeout(2000);
1027-
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
1084+
await page.evaluate(() => {
1085+
const scrollHeight = Math.max(
1086+
document.body.scrollHeight,
1087+
document.documentElement.scrollHeight
1088+
);
1089+
1090+
window.scrollTo(0, scrollHeight);
1091+
});
10281092
await page.waitForTimeout(2000);
1029-
1030-
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
1093+
1094+
const currentHeight = await page.evaluate(() => {
1095+
return Math.max(
1096+
document.body.scrollHeight,
1097+
document.documentElement.scrollHeight
1098+
);
1099+
});
10311100
const heightChanged = currentHeight !== previousHeight;
10321101
previousHeight = currentHeight;
10331102

@@ -1120,6 +1189,11 @@ export default class Interpreter extends EventEmitter {
11201189
}
11211190

11221191
private async runLoop(p: Page, workflow: Workflow) {
1192+
if (this.isAborted) {
1193+
this.log('Workflow aborted in runLoop', Level.WARN);
1194+
return;
1195+
}
1196+
11231197
let workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow));
11241198

11251199
workflowCopy = this.removeSpecialSelectors(workflowCopy);
@@ -1150,6 +1224,11 @@ export default class Interpreter extends EventEmitter {
11501224
const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
11511225

11521226
while (true) {
1227+
if (this.isAborted) {
1228+
this.log('Workflow aborted during step execution', Level.WARN);
1229+
return;
1230+
}
1231+
11531232
// Circuit breaker to prevent infinite loops
11541233
if (++loopIterations > MAX_LOOP_ITERATIONS) {
11551234
this.log('Maximum loop iterations reached, terminating to prevent infinite loop', Level.ERROR);
@@ -1232,6 +1311,11 @@ export default class Interpreter extends EventEmitter {
12321311
}
12331312
lastAction = action;
12341313

1314+
if (this.isAborted) {
1315+
this.log('Workflow aborted before action execution', Level.WARN);
1316+
return;
1317+
}
1318+
12351319
try {
12361320
console.log("Carrying out:", action.what);
12371321
await this.carryOutSteps(p, action.what);

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "maxun",
3-
"version": "0.0.22",
3+
"version": "0.0.23",
44
"author": "Maxun",
55
"license": "AGPL-3.0-or-later",
66
"dependencies": {
@@ -50,7 +50,7 @@
5050
"lodash": "^4.17.21",
5151
"loglevel": "^1.8.0",
5252
"loglevel-plugin-remote": "^0.6.8",
53-
"maxun-core": "^0.0.22",
53+
"maxun-core": "^0.0.23",
5454
"minio": "^8.0.1",
5555
"moment-timezone": "^0.5.45",
5656
"node-cron": "^3.0.3",

public/locales/de.json

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@
4848
"options": "Optionen",
4949
"heading": "Meine Roboter",
5050
"new": "Roboter erstellen",
51+
"search_criteria": "Versuchen Sie, Ihre Suchkriterien anzupassen",
52+
"placeholder": {
53+
"title": "Alles bereit für den Start",
54+
"body": "Roboter, die Sie erstellen, werden hier angezeigt. Klicken Sie auf „Roboter erstellen“, um loszulegen!",
55+
"search": "Keine Roboter entsprechen Ihrer Suche"
56+
},
5157
"modal": {
5258
"title": "Geben Sie die URL ein",
5359
"login_title": "Ist für diese Website eine Anmeldung erforderlich?",
@@ -84,12 +90,18 @@
8490
"runs": "Alle Ausführungen",
8591
"runStatus": "Status",
8692
"runName": "Name",
93+
"name": "Name",
8794
"startedAt": "Gestartet am",
8895
"finishedAt": "Beendet am",
8996
"delete": "Löschen",
9097
"settings": "Einstellungen",
9198
"search": "Ausführungen suchen...",
9299
"sort_tooltip": "Zum Sortieren klicken",
100+
"placeholder": {
101+
"title": "Keine Durchläufe gefunden",
102+
"body": "Hier werden alle Ihre Roboter-Durchläufe angezeigt. Sobald ein Roboter aktiv ist, werden seine Durchläufe hier protokolliert.",
103+
"search": "Keine Durchläufe entsprechen Ihrer Suche"
104+
},
93105
"notifications": {
94106
"no_runs": "Keine Ausführungen gefunden. Bitte versuchen Sie es erneut.",
95107
"delete_success": "Ausführung erfolgreich gelöscht"
@@ -241,7 +253,15 @@
241253
"unable_create_settings": "Listeneinstellungen können nicht erstellt werden. Stellen Sie sicher, dass Sie ein Feld für die Liste definiert haben.",
242254
"capture_text_discarded": "Texterfassung verworfen",
243255
"capture_list_discarded": "Listenerfassung verworfen",
244-
"label_required": "Beschriftung darf nicht leer sein"
256+
"label_required": "Beschriftung darf nicht leer sein",
257+
"duplicate_label": "Diese Beschriftung existiert bereits. Bitte verwenden Sie eine eindeutige Beschriftung.",
258+
"no_text_captured": "Bitte markieren und wählen Sie Textelemente aus, bevor Sie bestätigen.",
259+
"capture_list_first": "Bitte bewegen Sie die Maus über eine Liste und wählen Sie Textfelder darin aus",
260+
"confirm_all_list_fields": "Bitte bestätigen Sie alle erfassten Listenfelder, bevor Sie fortfahren"
261+
},
262+
"tooltips": {
263+
"capture_list_first": "Bewegen Sie die Maus über eine Liste und wählen Sie Textfelder darin aus",
264+
"confirm_all_list_fields": "Bitte bestätigen Sie alle erfassten Listenfelder, bevor Sie fortfahren"
245265
}
246266
},
247267
"save_recording": {
@@ -258,7 +278,8 @@
258278
},
259279
"errors": {
260280
"user_not_logged": "Benutzer nicht angemeldet. Aufnahme kann nicht gespeichert werden.",
261-
"exists_warning": "Ein Roboter mit diesem Namen existiert bereits, bitte bestätigen Sie das Überschreiben des Roboters."
281+
"exists_warning": "Ein Roboter mit diesem Namen existiert bereits, bitte bestätigen Sie das Überschreiben des Roboters.",
282+
"no_actions_performed": "Roboter kann nicht gespeichert werden. Bitte führen Sie mindestens eine Erfassungsaktion durch, bevor Sie speichern."
262283
},
263284
"tooltips": {
264285
"saving": "Workflow wird optimiert und gespeichert"

public/locales/en.json

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@
4848
"options": "Options",
4949
"heading":"My Robots",
5050
"new":"Create Robot",
51+
"search_criteria": "Try adjusting your search criteria",
52+
"placeholder": {
53+
"title": "You're All Set to Start",
54+
"body": "Robots you create will appear here. Click \"Create Robot\" to get started!",
55+
"search": "No robots match your search"
56+
},
5157
"modal":{
5258
"title":"Enter the URL",
5359
"login_title": "Does this website require logging in?",
@@ -84,12 +90,18 @@
8490
"runs":"All Runs",
8591
"runStatus":"Status",
8692
"runName":"Name",
93+
"name":"Name",
8794
"startedAt":"Started At",
8895
"finishedAt":"Finished At",
8996
"delete":"Delete",
9097
"settings":"Settings",
9198
"search":"Search Runs...",
9299
"sort_tooltip": "Click to sort",
100+
"placeholder": {
101+
"title": "No Runs Found",
102+
"body": "This is where all your robot runs will appear. Once a robot is active, its runs will be logged here.",
103+
"search":"No runs match your search"
104+
},
93105
"notifications": {
94106
"no_runs": "No runs found. Please try again.",
95107
"delete_success": "Run deleted successfully"
@@ -241,7 +253,15 @@
241253
"unable_create_settings": "Unable to create list settings. Make sure you have defined a field for the list.",
242254
"capture_text_discarded": "Capture Text Discarded",
243255
"capture_list_discarded": "Capture List Discarded",
244-
"label_required": "Label cannot be empty"
256+
"label_required": "Label cannot be empty",
257+
"duplicate_label": "This label already exists. Please use a unique label.",
258+
"no_text_captured": "Please highlight and select text elements before confirming.",
259+
"capture_list_first": "Please hover over a list and select text fields inside it first",
260+
"confirm_all_list_fields": "Please confirm all captured list fields before proceeding"
261+
},
262+
"tooltips": {
263+
"capture_list_first": "Hover over a list and select text fields inside it first",
264+
"confirm_all_list_fields": "Please confirm all captured list fields before proceeding"
245265
}
246266
},
247267
"save_recording": {
@@ -258,7 +278,8 @@
258278
},
259279
"errors": {
260280
"user_not_logged": "User not logged in. Cannot save recording.",
261-
"exists_warning": "Robot with this name already exists, please confirm the Robot's overwrite."
281+
"exists_warning": "Robot with this name already exists, please confirm the Robot's overwrite.",
282+
"no_actions_performed": "Cannot save robot. Please perform at least one capture action before saving."
262283
},
263284
"tooltips": {
264285
"saving": "Optimizing and saving the workflow"

0 commit comments

Comments
 (0)