From b7feaf90834725b663be42a6d962f08100424ef5 Mon Sep 17 00:00:00 2001 From: takker99 <37929109+takker99@users.noreply.github.com> Date: Mon, 27 Jan 2025 14:17:22 +0900 Subject: [PATCH 1/4] chore(websocket): Rename `findMetadata` to `getPageMetadataFromLines` Followed the function name in `/assets/index.js` --- websocket/findMetadata.test.ts | 4 ++-- websocket/findMetadata.ts | 2 +- websocket/makeChanges.ts | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/websocket/findMetadata.test.ts b/websocket/findMetadata.test.ts index 30ae4c8..efbd136 100644 --- a/websocket/findMetadata.test.ts +++ b/websocket/findMetadata.test.ts @@ -1,4 +1,4 @@ -import { findMetadata, getHelpfeels } from "./findMetadata.ts"; +import { getPageMetadataFromLines, getHelpfeels } from "./findMetadata.ts"; import { assertEquals } from "@std/assert"; import { assertSnapshot } from "@std/testing/snapshot"; @@ -39,7 +39,7 @@ Prepare thumbnail [https://scrapbox.io/files/65e7f4413bc95600258481fb.svg https://scrapbox.io/files/65e7f82e03949c0024a367d0.svg]`; // Test findMetadata function's ability to extract various metadata from a page -Deno.test("findMetadata()", (t) => assertSnapshot(t, findMetadata(text))); +Deno.test("findMetadata()", (t) => assertSnapshot(t, getPageMetadataFromLines(text))); // Test Helpfeel extraction (lines starting with "?") // These are used for collecting questions and help requests in Scrapbox diff --git a/websocket/findMetadata.ts b/websocket/findMetadata.ts index fe0b6c1..f5f9ad4 100644 --- a/websocket/findMetadata.ts +++ b/websocket/findMetadata.ts @@ -18,7 +18,7 @@ import { parseYoutube } from "../parser/youtube.ts"; * @returns A tuple containing [links, projectLinks, icons, image, files, helpfeels, infoboxDefinition] * where image can be null if no suitable preview image is found */ -export const findMetadata = ( +export const getPageMetadataFromLines = ( text: string, ): [ string[], diff --git a/websocket/makeChanges.ts b/websocket/makeChanges.ts index a738ca6..0a3d52c 100644 --- a/websocket/makeChanges.ts +++ b/websocket/makeChanges.ts @@ -1,7 +1,7 @@ import { diffToChanges } from "./diffToChanges.ts"; import type { Page } from "@cosense/types/rest"; import type { Change } from "./change.ts"; -import { findMetadata, getHelpfeels } from "./findMetadata.ts"; +import { getPageMetadataFromLines, getHelpfeels } from "./findMetadata.ts"; import { isSameArray } from "./isSameArray.ts"; import { isString } from "@core/unknownutil/is/string"; @@ -55,7 +55,7 @@ export function* makeChanges( files, helpfeels, infoboxDefinition, - ] = findMetadata(after_.join("\n")); + ] = getPageMetadataFromLines(after_.join("\n")); if (!isSameArray(before.links, links)) yield { links }; if (!isSameArray(before.projectLinks, projectLinks)) yield { projectLinks }; if (!isSameArray(before.icons, icons)) yield { icons }; From 4ae8d274dffc66f71809557b32e47924bff8c47f Mon Sep 17 00:00:00 2001 From: takker99 <37929109+takker99@users.noreply.github.com> Date: Mon, 27 Jan 2025 14:38:49 +0900 Subject: [PATCH 2/4] fix(websocket): Folllow the description making algorithm in Cosense --- .../__snapshots__/findMetadata.test.ts.snap | 33 ------------ websocket/findMetadata.test.ts | 46 ++++++++++++++-- websocket/findMetadata.ts | 54 +++++++++++++------ websocket/makeChanges.ts | 25 +++------ 4 files changed, 86 insertions(+), 72 deletions(-) delete mode 100644 websocket/__snapshots__/findMetadata.test.ts.snap diff --git a/websocket/__snapshots__/findMetadata.test.ts.snap b/websocket/__snapshots__/findMetadata.test.ts.snap deleted file mode 100644 index cb19126..0000000 --- a/websocket/__snapshots__/findMetadata.test.ts.snap +++ /dev/null @@ -1,33 +0,0 @@ -export const snapshot = {}; - -snapshot[`findMetadata() 1`] = ` -[ - [ - "normal", - "link2", - "hashtag", - ], - [ - "/help-en/external-link", - ], - [ - "scrapbox", - "takker", - ], - "https://scrapbox.io/files/65f29c24974fd8002333b160.svg", - [ - "65f29c24974fd8002333b160", - "65e7f82e03949c0024a367d0", - "65e7f4413bc95600258481fb", - ], - [ - "Need help with setup!!", - ], - [ - "Name [scrapbox.icon]", - "Address Add [link2] here", - "Phone Adding # won't create a link", - "Strengths List about 3 items", - ], -] -`; diff --git a/websocket/findMetadata.test.ts b/websocket/findMetadata.test.ts index efbd136..bce95c1 100644 --- a/websocket/findMetadata.test.ts +++ b/websocket/findMetadata.test.ts @@ -1,6 +1,5 @@ -import { getPageMetadataFromLines, getHelpfeels } from "./findMetadata.ts"; -import { assertEquals } from "@std/assert"; -import { assertSnapshot } from "@std/testing/snapshot"; +import { getHelpfeels, getPageMetadataFromLines } from "./findMetadata.ts"; +import { assertEquals } from "@std/assert/equals"; // Test data for metadata extraction from a Scrapbox page // This sample includes various Scrapbox syntax elements: @@ -38,8 +37,45 @@ Prepare thumbnail [https://scrapbox.io/files/65e7f4413bc95600258481fb.svg https://scrapbox.io/files/65e7f82e03949c0024a367d0.svg]`; -// Test findMetadata function's ability to extract various metadata from a page -Deno.test("findMetadata()", (t) => assertSnapshot(t, getPageMetadataFromLines(text))); +Deno.test("getPageMetadataFromLines()", () => { + assertEquals(getPageMetadataFromLines(text), [ + "test page", + [ + "normal", + "link2", + "hashtag", + ], + [ + "/help-en/external-link", + ], + [ + "scrapbox", + "takker", + ], + "https://scrapbox.io/files/65f29c24974fd8002333b160.svg", + [ + "[normal]link", + "but `this [link]` is not a link", + "`Links [link] and images [https://scrapbox.io/files/65f29c0c9045b5002522c8bb.svg] in code blocks should be ignored`", + "`? Need help with setup!!`", + "#hashtag is recommended", + ], + [ + "65f29c24974fd8002333b160", + "65e7f82e03949c0024a367d0", + "65e7f4413bc95600258481fb", + ], + [ + "Need help with setup!!", + ], + [ + "Name\t[scrapbox.icon]", + "Address\tAdd [link2] here", + "Phone\tAdding # won't create a link", + "Strengths\tList about 3 items", + ], + ]); +}); // Test Helpfeel extraction (lines starting with "?") // These are used for collecting questions and help requests in Scrapbox diff --git a/websocket/findMetadata.ts b/websocket/findMetadata.ts index f5f9ad4..cc27159 100644 --- a/websocket/findMetadata.ts +++ b/websocket/findMetadata.ts @@ -21,24 +21,17 @@ import { parseYoutube } from "../parser/youtube.ts"; export const getPageMetadataFromLines = ( text: string, ): [ - string[], - string[], - string[], - string | null, - string[], - string[], - string[], + title: string, + links: string[], + projectLinks: string[], + icons: string[], + image: string | null, + descriptions: string[], + files: string[], + helpfeels: string[], + infoboxDefinition: string[], ] => { - const blocks = parse(text, { hasTitle: true }).flatMap((block) => { - switch (block.type) { - case "codeBlock": - case "title": - return []; - case "line": - case "table": - return block; - } - }); + const blocks = parse(text, { hasTitle: true }); /** Map for detecting duplicate links while preserving link type information * @@ -49,6 +42,7 @@ export const getPageMetadataFromLines = ( * When the same page is referenced by both formats, * we prioritize the bracket link format in the final output */ + let title = ""; const linksLc = new Map(); const links = [] as string[]; const projectLinksLc = new Set(); @@ -56,6 +50,7 @@ export const getPageMetadataFromLines = ( const iconsLc = new Set(); const icons = [] as string[]; let image: string | null = null; + const descriptions = [] as string[]; const files = new Set(); const helpfeels = new Set(); @@ -150,11 +145,31 @@ export const getPageMetadataFromLines = ( for (const block of blocks) { switch (block.type) { + case "title": { + title = block.text; + continue; + } case "line": + if (descriptions.length < 5 && block.nodes.length > 0) { + descriptions.push( + block.nodes[0].type === "helpfeel" || + block.nodes[0].type === "commandLine" + ? makeInlineCodeForDescription(block.nodes[0].raw) + : block.nodes.map((node) => node.raw).join("").trim().slice( + 0, + 200, + ), + ); + } for (const node of block.nodes) { lookup(node); } continue; + case "codeBlock": + if (descriptions.length < 5) { + descriptions.push(makeInlineCodeForDescription(block.content)); + } + continue; case "table": { for (const row of block.cells) { for (const nodes of row) { @@ -176,16 +191,21 @@ export const getPageMetadataFromLines = ( } return [ + title, links, projectLinks, icons, image, + descriptions, [...files], [...helpfeels], infoboxDefinition, ]; }; +const makeInlineCodeForDescription = (text: string): `\`${string}\`` => + `\`${text.trim().replaceAll("`", "\\`").slice(0, 198)}\``; + const cutId = (link: string): string => link.replace(/#[a-f\d]{24,32}$/, ""); /** Extract Helpfeel entries from text diff --git a/websocket/makeChanges.ts b/websocket/makeChanges.ts index 0a3d52c..05dd797 100644 --- a/websocket/makeChanges.ts +++ b/websocket/makeChanges.ts @@ -1,7 +1,7 @@ import { diffToChanges } from "./diffToChanges.ts"; import type { Page } from "@cosense/types/rest"; import type { Change } from "./change.ts"; -import { getPageMetadataFromLines, getHelpfeels } from "./findMetadata.ts"; +import { getHelpfeels, getPageMetadataFromLines } from "./findMetadata.ts"; import { isSameArray } from "./isSameArray.ts"; import { isString } from "@core/unknownutil/is/string"; @@ -22,22 +22,6 @@ export function* makeChanges( yield change; } - // Handle title changes - // Note: We always include title change commits for new pages (`persistent === false`) - // to ensure proper page initialization - if (before.lines[0].text !== after_[0] || !before.persistent) { - yield { title: after_[0] }; - } - - // Process changes in page descriptions - // Descriptions are the first 5 lines after the title (lines 1-5) - // These lines provide a summary or additional context for the page - const leftDescriptions = before.lines.slice(1, 6).map((line) => line.text); - const rightDescriptions = after_.slice(1, 6); - if (leftDescriptions.join("") !== rightDescriptions.join("")) { - yield { descriptions: rightDescriptions }; - } - // Process changes in various metadata // Metadata includes: // - links: References to other pages @@ -48,18 +32,25 @@ export function* makeChanges( // - helpfeels: Questions or help requests (lines starting with "?") // - infoboxDefinition: Structured data definitions const [ + title, links, projectLinks, icons, image, + descriptions, files, helpfeels, infoboxDefinition, ] = getPageMetadataFromLines(after_.join("\n")); + // Handle title changes + // Note: We always include title change commits for new pages (`persistent === false`) + // to ensure proper page initialization + if (before.title !== title || !before.persistent) yield { title }; if (!isSameArray(before.links, links)) yield { links }; if (!isSameArray(before.projectLinks, projectLinks)) yield { projectLinks }; if (!isSameArray(before.icons, icons)) yield { icons }; if (before.image !== image) yield { image }; + if (!isSameArray(before.descriptions, descriptions)) yield { descriptions }; if (!isSameArray(before.files, files)) yield { files }; if (!isSameArray(getHelpfeels(before.lines), helpfeels)) yield { helpfeels }; if (!isSameArray(before.infoboxDefinition, infoboxDefinition)) { From bc575d9500ac4cedaf13b4de3c7bbeeb872ea3a3 Mon Sep 17 00:00:00 2001 From: takker99 <37929109+takker99@users.noreply.github.com> Date: Mon, 27 Jan 2025 14:39:29 +0900 Subject: [PATCH 3/4] chore(websocket): Rename `findMetadata` to `getPageMetadataFromLines` --- .../{findMetadata.test.ts => getPageMetadataFromLines.test.ts} | 2 +- websocket/{findMetadata.ts => getPageMetadataFromLines.ts} | 0 websocket/makeChanges.ts | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename websocket/{findMetadata.test.ts => getPageMetadataFromLines.test.ts} (96%) rename websocket/{findMetadata.ts => getPageMetadataFromLines.ts} (100%) diff --git a/websocket/findMetadata.test.ts b/websocket/getPageMetadataFromLines.test.ts similarity index 96% rename from websocket/findMetadata.test.ts rename to websocket/getPageMetadataFromLines.test.ts index bce95c1..ef77d88 100644 --- a/websocket/findMetadata.test.ts +++ b/websocket/getPageMetadataFromLines.test.ts @@ -1,4 +1,4 @@ -import { getHelpfeels, getPageMetadataFromLines } from "./findMetadata.ts"; +import { getHelpfeels, getPageMetadataFromLines } from "./getPageMetadataFromLines.ts"; import { assertEquals } from "@std/assert/equals"; // Test data for metadata extraction from a Scrapbox page diff --git a/websocket/findMetadata.ts b/websocket/getPageMetadataFromLines.ts similarity index 100% rename from websocket/findMetadata.ts rename to websocket/getPageMetadataFromLines.ts diff --git a/websocket/makeChanges.ts b/websocket/makeChanges.ts index 05dd797..0059a03 100644 --- a/websocket/makeChanges.ts +++ b/websocket/makeChanges.ts @@ -1,7 +1,7 @@ import { diffToChanges } from "./diffToChanges.ts"; import type { Page } from "@cosense/types/rest"; import type { Change } from "./change.ts"; -import { getHelpfeels, getPageMetadataFromLines } from "./findMetadata.ts"; +import { getHelpfeels, getPageMetadataFromLines } from "./getPageMetadataFromLines.ts"; import { isSameArray } from "./isSameArray.ts"; import { isString } from "@core/unknownutil/is/string"; From eab1c7cb2826ad9502525ab1f0985f0b80fb51f7 Mon Sep 17 00:00:00 2001 From: takker99 <37929109+takker99@users.noreply.github.com> Date: Mon, 27 Jan 2025 14:44:03 +0900 Subject: [PATCH 4/4] fix(websocket): Add `linesCount` and `charsCount` to metadata extraction --- websocket/change.ts | 9 +++++++++ websocket/getPageMetadataFromLines.test.ts | 7 ++++++- websocket/getPageMetadataFromLines.ts | 5 +++++ websocket/makeChanges.ts | 9 ++++++++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/websocket/change.ts b/websocket/change.ts index b9c5cf9..a66dbc2 100644 --- a/websocket/change.ts +++ b/websocket/change.ts @@ -11,6 +11,8 @@ export type Change = | HelpFeelsChange | infoboxDefinitionChange | TitleChange + | LinesCountChange + | charsCountChange | PinChange; export interface InsertChange { _insert: string; @@ -72,6 +74,13 @@ export interface infoboxDefinitionChange { */ infoboxDefinition: string[]; } +export interface LinesCountChange { + linesCount: number; +} +export interface charsCountChange { + charsCount: number; +} + export interface PinChange { pin: number; } diff --git a/websocket/getPageMetadataFromLines.test.ts b/websocket/getPageMetadataFromLines.test.ts index ef77d88..1284058 100644 --- a/websocket/getPageMetadataFromLines.test.ts +++ b/websocket/getPageMetadataFromLines.test.ts @@ -1,4 +1,7 @@ -import { getHelpfeels, getPageMetadataFromLines } from "./getPageMetadataFromLines.ts"; +import { + getHelpfeels, + getPageMetadataFromLines, +} from "./getPageMetadataFromLines.ts"; import { assertEquals } from "@std/assert/equals"; // Test data for metadata extraction from a Scrapbox page @@ -74,6 +77,8 @@ Deno.test("getPageMetadataFromLines()", () => { "Phone\tAdding # won't create a link", "Strengths\tList about 3 items", ], + 26, + 659, ]); }); diff --git a/websocket/getPageMetadataFromLines.ts b/websocket/getPageMetadataFromLines.ts index cc27159..898ba04 100644 --- a/websocket/getPageMetadataFromLines.ts +++ b/websocket/getPageMetadataFromLines.ts @@ -30,6 +30,8 @@ export const getPageMetadataFromLines = ( files: string[], helpfeels: string[], infoboxDefinition: string[], + linesCount: number, + charsCount: number, ] => { const blocks = parse(text, { hasTitle: true }); @@ -190,6 +192,7 @@ export const getPageMetadataFromLines = ( } } + const lines = text.split("\n"); return [ title, links, @@ -200,6 +203,8 @@ export const getPageMetadataFromLines = ( [...files], [...helpfeels], infoboxDefinition, + lines.length, + lines.reduce((acc, line) => acc + [...line].length, 0), ]; }; diff --git a/websocket/makeChanges.ts b/websocket/makeChanges.ts index 0059a03..6627ec5 100644 --- a/websocket/makeChanges.ts +++ b/websocket/makeChanges.ts @@ -1,7 +1,10 @@ import { diffToChanges } from "./diffToChanges.ts"; import type { Page } from "@cosense/types/rest"; import type { Change } from "./change.ts"; -import { getHelpfeels, getPageMetadataFromLines } from "./getPageMetadataFromLines.ts"; +import { + getHelpfeels, + getPageMetadataFromLines, +} from "./getPageMetadataFromLines.ts"; import { isSameArray } from "./isSameArray.ts"; import { isString } from "@core/unknownutil/is/string"; @@ -41,6 +44,8 @@ export function* makeChanges( files, helpfeels, infoboxDefinition, + linesCount, + charsCount, ] = getPageMetadataFromLines(after_.join("\n")); // Handle title changes // Note: We always include title change commits for new pages (`persistent === false`) @@ -56,4 +61,6 @@ export function* makeChanges( if (!isSameArray(before.infoboxDefinition, infoboxDefinition)) { yield { infoboxDefinition }; } + yield { linesCount }; + yield { charsCount }; }