diff --git a/websocket/__snapshots__/findMetadata.test.ts.snap b/websocket/__snapshots__/findMetadata.test.ts.snap deleted file mode 100644 index cb19126..0000000 --- a/websocket/__snapshots__/findMetadata.test.ts.snap +++ /dev/null @@ -1,33 +0,0 @@ -export const snapshot = {}; - -snapshot[`findMetadata() 1`] = ` -[ - [ - "normal", - "link2", - "hashtag", - ], - [ - "/help-en/external-link", - ], - [ - "scrapbox", - "takker", - ], - "https://scrapbox.io/files/65f29c24974fd8002333b160.svg", - [ - "65f29c24974fd8002333b160", - "65e7f82e03949c0024a367d0", - "65e7f4413bc95600258481fb", - ], - [ - "Need help with setup!!", - ], - [ - "Name [scrapbox.icon]", - "Address Add [link2] here", - "Phone Adding # won't create a link", - "Strengths List about 3 items", - ], -] -`; diff --git a/websocket/change.ts b/websocket/change.ts index b9c5cf9..a66dbc2 100644 --- a/websocket/change.ts +++ b/websocket/change.ts @@ -11,6 +11,8 @@ export type Change = | HelpFeelsChange | infoboxDefinitionChange | TitleChange + | LinesCountChange + | charsCountChange | PinChange; export interface InsertChange { _insert: string; @@ -72,6 +74,13 @@ export interface infoboxDefinitionChange { */ infoboxDefinition: string[]; } +export interface LinesCountChange { + linesCount: number; +} +export interface charsCountChange { + charsCount: number; +} + export interface PinChange { pin: number; } diff --git a/websocket/findMetadata.test.ts b/websocket/getPageMetadataFromLines.test.ts similarity index 54% rename from websocket/findMetadata.test.ts rename to websocket/getPageMetadataFromLines.test.ts index 30ae4c8..1284058 100644 --- a/websocket/findMetadata.test.ts +++ b/websocket/getPageMetadataFromLines.test.ts @@ -1,6 +1,8 @@ -import { findMetadata, getHelpfeels } from "./findMetadata.ts"; -import { assertEquals } from "@std/assert"; -import { assertSnapshot } from "@std/testing/snapshot"; +import { + getHelpfeels, + getPageMetadataFromLines, +} from "./getPageMetadataFromLines.ts"; +import { assertEquals } from "@std/assert/equals"; // Test data for metadata extraction from a Scrapbox page // This sample includes various Scrapbox syntax elements: @@ -38,8 +40,47 @@ Prepare thumbnail [https://scrapbox.io/files/65e7f4413bc95600258481fb.svg https://scrapbox.io/files/65e7f82e03949c0024a367d0.svg]`; -// Test findMetadata function's ability to extract various metadata from a page -Deno.test("findMetadata()", (t) => assertSnapshot(t, findMetadata(text))); +Deno.test("getPageMetadataFromLines()", () => { + assertEquals(getPageMetadataFromLines(text), [ + "test page", + [ + "normal", + "link2", + "hashtag", + ], + [ + "/help-en/external-link", + ], + [ + "scrapbox", + "takker", + ], + "https://scrapbox.io/files/65f29c24974fd8002333b160.svg", + [ + "[normal]link", + "but `this [link]` is not a link", + "`Links [link] and images [https://scrapbox.io/files/65f29c0c9045b5002522c8bb.svg] in code blocks should be ignored`", + "`? Need help with setup!!`", + "#hashtag is recommended", + ], + [ + "65f29c24974fd8002333b160", + "65e7f82e03949c0024a367d0", + "65e7f4413bc95600258481fb", + ], + [ + "Need help with setup!!", + ], + [ + "Name\t[scrapbox.icon]", + "Address\tAdd [link2] here", + "Phone\tAdding # won't create a link", + "Strengths\tList about 3 items", + ], + 26, + 659, + ]); +}); // Test Helpfeel extraction (lines starting with "?") // These are used for collecting questions and help requests in Scrapbox diff --git a/websocket/findMetadata.ts b/websocket/getPageMetadataFromLines.ts similarity index 81% rename from websocket/findMetadata.ts rename to websocket/getPageMetadataFromLines.ts index fe0b6c1..898ba04 100644 --- a/websocket/findMetadata.ts +++ b/websocket/getPageMetadataFromLines.ts @@ -18,27 +18,22 @@ import { parseYoutube } from "../parser/youtube.ts"; * @returns A tuple containing [links, projectLinks, icons, image, files, helpfeels, infoboxDefinition] * where image can be null if no suitable preview image is found */ -export const findMetadata = ( +export const getPageMetadataFromLines = ( text: string, ): [ - string[], - string[], - string[], - string | null, - string[], - string[], - string[], + title: string, + links: string[], + projectLinks: string[], + icons: string[], + image: string | null, + descriptions: string[], + files: string[], + helpfeels: string[], + infoboxDefinition: string[], + linesCount: number, + charsCount: number, ] => { - const blocks = parse(text, { hasTitle: true }).flatMap((block) => { - switch (block.type) { - case "codeBlock": - case "title": - return []; - case "line": - case "table": - return block; - } - }); + const blocks = parse(text, { hasTitle: true }); /** Map for detecting duplicate links while preserving link type information * @@ -49,6 +44,7 @@ export const findMetadata = ( * When the same page is referenced by both formats, * we prioritize the bracket link format in the final output */ + let title = ""; const linksLc = new Map(); const links = [] as string[]; const projectLinksLc = new Set(); @@ -56,6 +52,7 @@ export const findMetadata = ( const iconsLc = new Set(); const icons = [] as string[]; let image: string | null = null; + const descriptions = [] as string[]; const files = new Set(); const helpfeels = new Set(); @@ -150,11 +147,31 @@ export const findMetadata = ( for (const block of blocks) { switch (block.type) { + case "title": { + title = block.text; + continue; + } case "line": + if (descriptions.length < 5 && block.nodes.length > 0) { + descriptions.push( + block.nodes[0].type === "helpfeel" || + block.nodes[0].type === "commandLine" + ? makeInlineCodeForDescription(block.nodes[0].raw) + : block.nodes.map((node) => node.raw).join("").trim().slice( + 0, + 200, + ), + ); + } for (const node of block.nodes) { lookup(node); } continue; + case "codeBlock": + if (descriptions.length < 5) { + descriptions.push(makeInlineCodeForDescription(block.content)); + } + continue; case "table": { for (const row of block.cells) { for (const nodes of row) { @@ -175,17 +192,25 @@ export const findMetadata = ( } } + const lines = text.split("\n"); return [ + title, links, projectLinks, icons, image, + descriptions, [...files], [...helpfeels], infoboxDefinition, + lines.length, + lines.reduce((acc, line) => acc + [...line].length, 0), ]; }; +const makeInlineCodeForDescription = (text: string): `\`${string}\`` => + `\`${text.trim().replaceAll("`", "\\`").slice(0, 198)}\``; + const cutId = (link: string): string => link.replace(/#[a-f\d]{24,32}$/, ""); /** Extract Helpfeel entries from text diff --git a/websocket/makeChanges.ts b/websocket/makeChanges.ts index a738ca6..6627ec5 100644 --- a/websocket/makeChanges.ts +++ b/websocket/makeChanges.ts @@ -1,7 +1,10 @@ import { diffToChanges } from "./diffToChanges.ts"; import type { Page } from "@cosense/types/rest"; import type { Change } from "./change.ts"; -import { findMetadata, getHelpfeels } from "./findMetadata.ts"; +import { + getHelpfeels, + getPageMetadataFromLines, +} from "./getPageMetadataFromLines.ts"; import { isSameArray } from "./isSameArray.ts"; import { isString } from "@core/unknownutil/is/string"; @@ -22,22 +25,6 @@ export function* makeChanges( yield change; } - // Handle title changes - // Note: We always include title change commits for new pages (`persistent === false`) - // to ensure proper page initialization - if (before.lines[0].text !== after_[0] || !before.persistent) { - yield { title: after_[0] }; - } - - // Process changes in page descriptions - // Descriptions are the first 5 lines after the title (lines 1-5) - // These lines provide a summary or additional context for the page - const leftDescriptions = before.lines.slice(1, 6).map((line) => line.text); - const rightDescriptions = after_.slice(1, 6); - if (leftDescriptions.join("") !== rightDescriptions.join("")) { - yield { descriptions: rightDescriptions }; - } - // Process changes in various metadata // Metadata includes: // - links: References to other pages @@ -48,21 +35,32 @@ export function* makeChanges( // - helpfeels: Questions or help requests (lines starting with "?") // - infoboxDefinition: Structured data definitions const [ + title, links, projectLinks, icons, image, + descriptions, files, helpfeels, infoboxDefinition, - ] = findMetadata(after_.join("\n")); + linesCount, + charsCount, + ] = getPageMetadataFromLines(after_.join("\n")); + // Handle title changes + // Note: We always include title change commits for new pages (`persistent === false`) + // to ensure proper page initialization + if (before.title !== title || !before.persistent) yield { title }; if (!isSameArray(before.links, links)) yield { links }; if (!isSameArray(before.projectLinks, projectLinks)) yield { projectLinks }; if (!isSameArray(before.icons, icons)) yield { icons }; if (before.image !== image) yield { image }; + if (!isSameArray(before.descriptions, descriptions)) yield { descriptions }; if (!isSameArray(before.files, files)) yield { files }; if (!isSameArray(getHelpfeels(before.lines), helpfeels)) yield { helpfeels }; if (!isSameArray(before.infoboxDefinition, infoboxDefinition)) { yield { infoboxDefinition }; } + yield { linesCount }; + yield { charsCount }; }