Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 0 additions & 33 deletions websocket/__snapshots__/findMetadata.test.ts.snap

This file was deleted.

9 changes: 9 additions & 0 deletions websocket/change.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export type Change =
| HelpFeelsChange
| infoboxDefinitionChange
| TitleChange
| LinesCountChange
| charsCountChange
| PinChange;
export interface InsertChange {
_insert: string;
Expand Down Expand Up @@ -72,6 +74,13 @@ export interface infoboxDefinitionChange {
*/
infoboxDefinition: string[];
}
export interface LinesCountChange {
linesCount: number;
}
export interface charsCountChange {
charsCount: number;
}

export interface PinChange {
pin: number;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { findMetadata, getHelpfeels } from "./findMetadata.ts";
import { assertEquals } from "@std/assert";
import { assertSnapshot } from "@std/testing/snapshot";
import {
getHelpfeels,
getPageMetadataFromLines,
} from "./getPageMetadataFromLines.ts";
import { assertEquals } from "@std/assert/equals";

// Test data for metadata extraction from a Scrapbox page
// This sample includes various Scrapbox syntax elements:
Expand Down Expand Up @@ -38,8 +40,47 @@ Prepare thumbnail

[https://scrapbox.io/files/65e7f4413bc95600258481fb.svg https://scrapbox.io/files/65e7f82e03949c0024a367d0.svg]`;

// Test findMetadata function's ability to extract various metadata from a page
Deno.test("findMetadata()", (t) => assertSnapshot(t, findMetadata(text)));
Deno.test("getPageMetadataFromLines()", () => {
assertEquals(getPageMetadataFromLines(text), [
"test page",
[
"normal",
"link2",
"hashtag",
],
[
"/help-en/external-link",
],
[
"scrapbox",
"takker",
],
"https://scrapbox.io/files/65f29c24974fd8002333b160.svg",
[
"[normal]link",
"but `this [link]` is not a link",
"`Links [link] and images [https://scrapbox.io/files/65f29c0c9045b5002522c8bb.svg] in code blocks should be ignored`",
"`? Need help with setup!!`",
"#hashtag is recommended",
],
[
"65f29c24974fd8002333b160",
"65e7f82e03949c0024a367d0",
"65e7f4413bc95600258481fb",
],
[
"Need help with setup!!",
],
[
"Name\t[scrapbox.icon]",
"Address\tAdd [link2] here",
"Phone\tAdding # won't create a link",
"Strengths\tList about 3 items",
],
26,
659,
]);
});

// Test Helpfeel extraction (lines starting with "?")
// These are used for collecting questions and help requests in Scrapbox
Expand Down
61 changes: 43 additions & 18 deletions websocket/findMetadata.ts → websocket/getPageMetadataFromLines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,22 @@ import { parseYoutube } from "../parser/youtube.ts";
* @returns A tuple containing [links, projectLinks, icons, image, files, helpfeels, infoboxDefinition]
* where image can be null if no suitable preview image is found
*/
export const findMetadata = (
export const getPageMetadataFromLines = (
text: string,
): [
string[],
string[],
string[],
string | null,
string[],
string[],
string[],
title: string,
links: string[],
projectLinks: string[],
icons: string[],
image: string | null,
descriptions: string[],
files: string[],
helpfeels: string[],
infoboxDefinition: string[],
linesCount: number,
charsCount: number,
] => {
const blocks = parse(text, { hasTitle: true }).flatMap((block) => {
switch (block.type) {
case "codeBlock":
case "title":
return [];
case "line":
case "table":
return block;
}
});
const blocks = parse(text, { hasTitle: true });

/** Map for detecting duplicate links while preserving link type information
*
Expand All @@ -49,13 +44,15 @@ export const findMetadata = (
* When the same page is referenced by both formats,
* we prioritize the bracket link format in the final output
*/
let title = "";
const linksLc = new Map<string, boolean>();
const links = [] as string[];
const projectLinksLc = new Set<string>();
const projectLinks = [] as string[];
const iconsLc = new Set<string>();
const icons = [] as string[];
let image: string | null = null;
const descriptions = [] as string[];
const files = new Set<string>();
const helpfeels = new Set<string>();

Expand Down Expand Up @@ -150,11 +147,31 @@ export const findMetadata = (

for (const block of blocks) {
switch (block.type) {
case "title": {
title = block.text;
continue;
}
case "line":
if (descriptions.length < 5 && block.nodes.length > 0) {
descriptions.push(
block.nodes[0].type === "helpfeel" ||
block.nodes[0].type === "commandLine"
? makeInlineCodeForDescription(block.nodes[0].raw)
: block.nodes.map((node) => node.raw).join("").trim().slice(
0,
200,
),
);
}
for (const node of block.nodes) {
lookup(node);
}
continue;
case "codeBlock":
if (descriptions.length < 5) {
descriptions.push(makeInlineCodeForDescription(block.content));
}
continue;
case "table": {
for (const row of block.cells) {
for (const nodes of row) {
Expand All @@ -175,17 +192,25 @@ export const findMetadata = (
}
}

const lines = text.split("\n");
return [
title,
links,
projectLinks,
icons,
image,
descriptions,
[...files],
[...helpfeels],
infoboxDefinition,
lines.length,
lines.reduce((acc, line) => acc + [...line].length, 0),
];
};

const makeInlineCodeForDescription = (text: string): `\`${string}\`` =>
`\`${text.trim().replaceAll("`", "\\`").slice(0, 198)}\``;

const cutId = (link: string): string => link.replace(/#[a-f\d]{24,32}$/, "");

/** Extract Helpfeel entries from text
Expand Down
34 changes: 16 additions & 18 deletions websocket/makeChanges.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import { diffToChanges } from "./diffToChanges.ts";
import type { Page } from "@cosense/types/rest";
import type { Change } from "./change.ts";
import { findMetadata, getHelpfeels } from "./findMetadata.ts";
import {
getHelpfeels,
getPageMetadataFromLines,
} from "./getPageMetadataFromLines.ts";
import { isSameArray } from "./isSameArray.ts";
import { isString } from "@core/unknownutil/is/string";

Expand All @@ -22,22 +25,6 @@ export function* makeChanges(
yield change;
}

// Handle title changes
// Note: We always include title change commits for new pages (`persistent === false`)
// to ensure proper page initialization
if (before.lines[0].text !== after_[0] || !before.persistent) {
yield { title: after_[0] };
}

// Process changes in page descriptions
// Descriptions are the first 5 lines after the title (lines 1-5)
// These lines provide a summary or additional context for the page
const leftDescriptions = before.lines.slice(1, 6).map((line) => line.text);
const rightDescriptions = after_.slice(1, 6);
if (leftDescriptions.join("") !== rightDescriptions.join("")) {
yield { descriptions: rightDescriptions };
}

// Process changes in various metadata
// Metadata includes:
// - links: References to other pages
Expand All @@ -48,21 +35,32 @@ export function* makeChanges(
// - helpfeels: Questions or help requests (lines starting with "?")
// - infoboxDefinition: Structured data definitions
const [
title,
links,
projectLinks,
icons,
image,
descriptions,
files,
helpfeels,
infoboxDefinition,
] = findMetadata(after_.join("\n"));
linesCount,
charsCount,
] = getPageMetadataFromLines(after_.join("\n"));
// Handle title changes
// Note: We always include title change commits for new pages (`persistent === false`)
// to ensure proper page initialization
if (before.title !== title || !before.persistent) yield { title };
if (!isSameArray(before.links, links)) yield { links };
if (!isSameArray(before.projectLinks, projectLinks)) yield { projectLinks };
if (!isSameArray(before.icons, icons)) yield { icons };
if (before.image !== image) yield { image };
if (!isSameArray(before.descriptions, descriptions)) yield { descriptions };
if (!isSameArray(before.files, files)) yield { files };
if (!isSameArray(getHelpfeels(before.lines), helpfeels)) yield { helpfeels };
if (!isSameArray(before.infoboxDefinition, infoboxDefinition)) {
yield { infoboxDefinition };
}
yield { linesCount };
yield { charsCount };
}
Loading