Skip to content

Commit dec45fa

Browse files
authored
Merge pull request #825 from getmaxun/develop
chore: release v0.0.25
2 parents 08279ac + acba072 commit dec45fa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+3786
-1911
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ This project is licensed under <a href="./LICENSE">AGPLv3</a>.
158158
</p>
159159

160160
# Support Us
161-
Star the repository, contribute if you love what we’re building, or make a [one-time donation](https://bit.ly/maxun-oss). Every little bit helps us keep the lights on and the robots running.
161+
Star the repository, contribute if you love what we’re building, or [sponsor us](https://github.com/sponsors/amhsirak).
162162

163163
# Contributors
164164
Thank you to the combined efforts of everyone who contributes!
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

maxun-core/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "maxun-core",
3-
"version": "0.0.24",
3+
"version": "0.0.25",
44
"description": "Core package for Maxun, responsible for data extraction",
55
"main": "build/index.js",
66
"typings": "build/index.d.ts",

maxun-core/src/interpret.ts

Lines changed: 179 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ export default class Interpreter extends EventEmitter {
7373

7474
private cumulativeResults: Record<string, any>[] = [];
7575

76+
private namedResults: Record<string, Record<string, any>> = {};
77+
78+
private screenshotCounter: number = 0;
79+
80+
private serializableDataByType: Record<string, Record<string, any>> = {
81+
scrapeList: {},
82+
scrapeSchema: {}
83+
};
84+
7685
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
7786
super();
7887
this.workflow = workflow.workflow;
@@ -402,15 +411,37 @@ export default class Interpreter extends EventEmitter {
402411
* Beware of false linter errors - here, we know better!
403412
*/
404413
const wawActions: Record<CustomFunctions, (...args: any[]) => void> = {
405-
screenshot: async (params: PageScreenshotOptions) => {
414+
screenshot: async (
415+
params: PageScreenshotOptions,
416+
nameOverride?: string
417+
) => {
406418
if (this.options.debugChannel?.setActionType) {
407-
this.options.debugChannel.setActionType('screenshot');
419+
this.options.debugChannel.setActionType("screenshot");
408420
}
409421

410422
const screenshotBuffer = await page.screenshot({
411-
...params, path: undefined,
423+
...params,
424+
path: undefined,
412425
});
413-
await this.options.binaryCallback(screenshotBuffer, 'image/png');
426+
427+
const explicitName = (typeof nameOverride === 'string' && nameOverride.trim().length > 0) ? nameOverride.trim() : null;
428+
let screenshotName: string;
429+
430+
if (explicitName) {
431+
screenshotName = explicitName;
432+
} else {
433+
this.screenshotCounter += 1;
434+
screenshotName = `Screenshot ${this.screenshotCounter}`;
435+
}
436+
437+
await this.options.binaryCallback(
438+
{
439+
name: screenshotName,
440+
data: screenshotBuffer,
441+
mimeType: "image/png",
442+
},
443+
"image/png"
444+
);
414445
},
415446
enqueueLinks: async (selector: string) => {
416447
if (this.options.debugChannel?.setActionType) {
@@ -476,21 +507,55 @@ export default class Interpreter extends EventEmitter {
476507
this.cumulativeResults = [];
477508
}
478509

479-
if (this.cumulativeResults.length === 0) {
480-
this.cumulativeResults.push({});
481-
}
482-
483-
const mergedResult = this.cumulativeResults[0];
484510
const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
485511

486-
Object.entries(resultToProcess).forEach(([key, value]) => {
487-
if (value !== undefined) {
488-
mergedResult[key] = value;
512+
if (this.cumulativeResults.length === 0) {
513+
const newRow = {};
514+
Object.entries(resultToProcess).forEach(([key, value]) => {
515+
if (value !== undefined) {
516+
newRow[key] = value;
517+
}
518+
});
519+
this.cumulativeResults.push(newRow);
520+
} else {
521+
const lastRow = this.cumulativeResults[this.cumulativeResults.length - 1];
522+
const newResultKeys = Object.keys(resultToProcess).filter(key => resultToProcess[key] !== undefined);
523+
const hasRepeatedKeys = newResultKeys.some(key => lastRow.hasOwnProperty(key));
524+
525+
if (hasRepeatedKeys) {
526+
const newRow = {};
527+
Object.entries(resultToProcess).forEach(([key, value]) => {
528+
if (value !== undefined) {
529+
newRow[key] = value;
530+
}
531+
});
532+
this.cumulativeResults.push(newRow);
533+
} else {
534+
Object.entries(resultToProcess).forEach(([key, value]) => {
535+
if (value !== undefined) {
536+
lastRow[key] = value;
537+
}
538+
});
489539
}
490-
});
540+
}
491541

492-
console.log("Updated merged result:", mergedResult);
493-
await this.options.serializableCallback([mergedResult]);
542+
const actionType = "scrapeSchema";
543+
const actionName = (schema as any).__name || "Texts";
544+
545+
if (!this.namedResults[actionType]) this.namedResults[actionType] = {};
546+
this.namedResults[actionType][actionName] = this.cumulativeResults;
547+
548+
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
549+
if (!this.serializableDataByType[actionType][actionName]) {
550+
this.serializableDataByType[actionType][actionName] = [];
551+
}
552+
553+
this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
554+
555+
await this.options.serializableCallback({
556+
scrapeList: this.serializableDataByType.scrapeList,
557+
scrapeSchema: this.serializableDataByType.scrapeSchema
558+
});
494559
},
495560

496561
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
@@ -508,18 +573,62 @@ export default class Interpreter extends EventEmitter {
508573
return;
509574
}
510575

511-
await this.ensureScriptsLoaded(page);
576+
try {
577+
await this.ensureScriptsLoaded(page);
512578

513-
if (this.options.debugChannel?.incrementScrapeListIndex) {
514-
this.options.debugChannel.incrementScrapeListIndex();
515-
}
579+
if (this.options.debugChannel?.incrementScrapeListIndex) {
580+
this.options.debugChannel.incrementScrapeListIndex();
581+
}
516582

517-
if (!config.pagination) {
518-
const scrapeResults: Record<string, any>[] = await page.evaluate((cfg) => window.scrapeList(cfg), config);
519-
await this.options.serializableCallback(scrapeResults);
520-
} else {
521-
const scrapeResults: Record<string, any>[] = await this.handlePagination(page, config);
522-
await this.options.serializableCallback(scrapeResults);
583+
let scrapeResults = [];
584+
585+
if (!config.pagination) {
586+
scrapeResults = await page.evaluate((cfg) => {
587+
try {
588+
return window.scrapeList(cfg);
589+
} catch (error) {
590+
console.warn('ScrapeList evaluation failed:', error.message);
591+
return [];
592+
}
593+
}, config);
594+
} else {
595+
scrapeResults = await this.handlePagination(page, config);
596+
}
597+
598+
if (!Array.isArray(scrapeResults)) {
599+
scrapeResults = [];
600+
}
601+
602+
const actionType = "scrapeList";
603+
const actionName = (config as any).__name || "List";
604+
605+
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
606+
if (!this.serializableDataByType[actionType][actionName]) {
607+
this.serializableDataByType[actionType][actionName] = [];
608+
}
609+
610+
this.serializableDataByType[actionType][actionName].push(...scrapeResults);
611+
612+
await this.options.serializableCallback({
613+
scrapeList: this.serializableDataByType.scrapeList,
614+
scrapeSchema: this.serializableDataByType.scrapeSchema
615+
});
616+
} catch (error) {
617+
console.error('ScrapeList action failed completely:', error.message);
618+
619+
const actionType = "scrapeList";
620+
const actionName = (config as any).__name || "List";
621+
622+
if (!this.namedResults[actionType]) this.namedResults[actionType] = {};
623+
this.namedResults[actionType][actionName] = [];
624+
625+
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
626+
this.serializableDataByType[actionType][actionName] = [];
627+
628+
await this.options.serializableCallback({
629+
scrapeList: this.serializableDataByType.scrapeList,
630+
scrapeSchema: this.serializableDataByType.scrapeSchema
631+
});
523632
}
524633
},
525634

@@ -595,12 +704,56 @@ export default class Interpreter extends EventEmitter {
595704

596705

597706
for (const step of steps) {
707+
if (this.isAborted) {
708+
this.log('Workflow aborted during step execution', Level.WARN);
709+
return;
710+
}
711+
598712
this.log(`Launching ${String(step.action)}`, Level.LOG);
599713

714+
let stepName: string | null = null;
715+
try {
716+
const debug = this.options.debugChannel;
717+
if (debug?.setActionType) {
718+
debug.setActionType(String(step.action));
719+
}
720+
721+
if ((step as any)?.name) {
722+
stepName = (step as any).name;
723+
} else if (
724+
Array.isArray((step as any)?.args) &&
725+
(step as any).args.length > 0 &&
726+
typeof (step as any).args[0] === "object" &&
727+
"__name" in (step as any).args[0]
728+
) {
729+
stepName = (step as any).args[0].__name;
730+
} else if (
731+
typeof (step as any)?.args === "object" &&
732+
step?.args !== null &&
733+
"__name" in (step as any).args
734+
) {
735+
stepName = (step as any).args.__name;
736+
}
737+
738+
if (!stepName) {
739+
stepName = String(step.action);
740+
}
741+
742+
if (debug && typeof (debug as any).setActionName === "function") {
743+
(debug as any).setActionName(stepName);
744+
}
745+
} catch (err) {
746+
this.log(`Failed to set action name/type: ${(err as Error).message}`, Level.WARN);
747+
}
748+
600749
if (step.action in wawActions) {
601750
// "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
602751
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
603-
await wawActions[step.action as CustomFunctions](...(params ?? []));
752+
if (step.action === 'screenshot') {
753+
await (wawActions.screenshot as any)(...(params ?? []), stepName ?? undefined);
754+
} else {
755+
await wawActions[step.action as CustomFunctions](...(params ?? []));
756+
}
604757
} else {
605758
if (this.options.debugChannel?.setActionType) {
606759
this.options.debugChannel.setActionType(String(step.action));

maxun-core/src/preprocessor.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ export default class Preprocessor {
3636
what: Joi.array().items({
3737
action: Joi.string().required(),
3838
args: Joi.array().items(Joi.any()),
39+
name: Joi.string(),
40+
actionId: Joi.string()
3941
}).required(),
4042
}),
4143
).required(),

maxun-core/src/types/workflow.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot
3232

3333
export type What = {
3434
action: MethodNames<Page> | CustomFunctions,
35-
args?: any[]
35+
args?: any[],
36+
name?: string,
37+
actionId?: string
3638
};
3739

3840
export type PageState = Partial<BaseConditions>;

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "maxun",
3-
"version": "0.0.24",
3+
"version": "0.0.25",
44
"author": "Maxun",
55
"license": "AGPL-3.0-or-later",
66
"dependencies": {
@@ -51,7 +51,7 @@
5151
"lodash": "^4.17.21",
5252
"loglevel": "^1.8.0",
5353
"loglevel-plugin-remote": "^0.6.8",
54-
"maxun-core": "^0.0.24",
54+
"maxun-core": "^0.0.25",
5555
"minio": "^8.0.1",
5656
"moment-timezone": "^0.5.45",
5757
"node-cron": "^3.0.3",
@@ -81,7 +81,7 @@
8181
"swagger-jsdoc": "^6.2.8",
8282
"swagger-ui-express": "^5.0.1",
8383
"typedoc": "^0.23.8",
84-
"typescript": "^4.6.3",
84+
"typescript": "^5.0.0",
8585
"uuid": "^8.3.2",
8686
"uuidv4": "^6.2.12",
8787
"web-vitals": "^2.1.4",

0 commit comments

Comments
 (0)