Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 59b5368

Browse files
Merge pull request #224 from takker99:page-metadata
fix(websocket) Follow Cosense's metadata generation
2 parents 830354d + eab1c7c commit 59b5368

File tree

5 files changed

+114
-74
lines changed

5 files changed

+114
-74
lines changed

‎websocket/__snapshots__/findMetadata.test.ts.snap

Lines changed: 0 additions & 33 deletions
This file was deleted.

‎websocket/change.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ export type Change =
1111
| HelpFeelsChange
1212
| infoboxDefinitionChange
1313
| TitleChange
14+
| LinesCountChange
15+
| charsCountChange
1416
| PinChange;
1517
export interface InsertChange {
1618
_insert: string;
@@ -72,6 +74,13 @@ export interface infoboxDefinitionChange {
7274
*/
7375
infoboxDefinition: string[];
7476
}
77+
export interface LinesCountChange {
78+
linesCount: number;
79+
}
80+
export interface charsCountChange {
81+
charsCount: number;
82+
}
83+
7584
export interface PinChange {
7685
pin: number;
7786
}

‎websocket/findMetadata.test.ts renamed to ‎websocket/getPageMetadataFromLines.test.ts

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
import { findMetadata, getHelpfeels } from "./findMetadata.ts";
2-
import { assertEquals } from "@std/assert";
3-
import { assertSnapshot } from "@std/testing/snapshot";
1+
import {
2+
getHelpfeels,
3+
getPageMetadataFromLines,
4+
} from "./getPageMetadataFromLines.ts";
5+
import { assertEquals } from "@std/assert/equals";
46

57
// Test data for metadata extraction from a Scrapbox page
68
// This sample includes various Scrapbox syntax elements:
@@ -38,8 +40,47 @@ Prepare thumbnail
3840
3941
[https://scrapbox.io/files/65e7f4413bc95600258481fb.svg https://scrapbox.io/files/65e7f82e03949c0024a367d0.svg]`;
4042

41-
// Test findMetadata function's ability to extract various metadata from a page
42-
Deno.test("findMetadata()", (t) => assertSnapshot(t, findMetadata(text)));
43+
Deno.test("getPageMetadataFromLines()", () => {
44+
assertEquals(getPageMetadataFromLines(text), [
45+
"test page",
46+
[
47+
"normal",
48+
"link2",
49+
"hashtag",
50+
],
51+
[
52+
"/help-en/external-link",
53+
],
54+
[
55+
"scrapbox",
56+
"takker",
57+
],
58+
"https://scrapbox.io/files/65f29c24974fd8002333b160.svg",
59+
[
60+
"[normal]link",
61+
"but `this [link]` is not a link",
62+
"`Links [link] and images [https://scrapbox.io/files/65f29c0c9045b5002522c8bb.svg] in code blocks should be ignored`",
63+
"`? Need help with setup!!`",
64+
"#hashtag is recommended",
65+
],
66+
[
67+
"65f29c24974fd8002333b160",
68+
"65e7f82e03949c0024a367d0",
69+
"65e7f4413bc95600258481fb",
70+
],
71+
[
72+
"Need help with setup!!",
73+
],
74+
[
75+
"Name\t[scrapbox.icon]",
76+
"Address\tAdd [link2] here",
77+
"Phone\tAdding # won't create a link",
78+
"Strengths\tList about 3 items",
79+
],
80+
26,
81+
659,
82+
]);
83+
});
4384

4485
// Test Helpfeel extraction (lines starting with "?")
4586
// These are used for collecting questions and help requests in Scrapbox

‎websocket/findMetadata.ts renamed to ‎websocket/getPageMetadataFromLines.ts

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,22 @@ import { parseYoutube } from "../parser/youtube.ts";
1818
* @returns A tuple containing [links, projectLinks, icons, image, files, helpfeels, infoboxDefinition]
1919
* where image can be null if no suitable preview image is found
2020
*/
21-
export const findMetadata = (
21+
export const getPageMetadataFromLines = (
2222
text: string,
2323
): [
24-
string[],
25-
string[],
26-
string[],
27-
string | null,
28-
string[],
29-
string[],
30-
string[],
24+
title: string,
25+
links: string[],
26+
projectLinks: string[],
27+
icons: string[],
28+
image: string | null,
29+
descriptions: string[],
30+
files: string[],
31+
helpfeels: string[],
32+
infoboxDefinition: string[],
33+
linesCount: number,
34+
charsCount: number,
3135
] => {
32-
const blocks = parse(text, { hasTitle: true }).flatMap((block) => {
33-
switch (block.type) {
34-
case "codeBlock":
35-
case "title":
36-
return [];
37-
case "line":
38-
case "table":
39-
return block;
40-
}
41-
});
36+
const blocks = parse(text, { hasTitle: true });
4237

4338
/** Map for detecting duplicate links while preserving link type information
4439
*
@@ -49,13 +44,15 @@ export const findMetadata = (
4944
* When the same page is referenced by both formats,
5045
* we prioritize the bracket link format in the final output
5146
*/
47+
let title = "";
5248
const linksLc = new Map<string, boolean>();
5349
const links = [] as string[];
5450
const projectLinksLc = new Set<string>();
5551
const projectLinks = [] as string[];
5652
const iconsLc = new Set<string>();
5753
const icons = [] as string[];
5854
let image: string | null = null;
55+
const descriptions = [] as string[];
5956
const files = new Set<string>();
6057
const helpfeels = new Set<string>();
6158

@@ -150,11 +147,31 @@ export const findMetadata = (
150147

151148
for (const block of blocks) {
152149
switch (block.type) {
150+
case "title": {
151+
title = block.text;
152+
continue;
153+
}
153154
case "line":
155+
if (descriptions.length < 5 && block.nodes.length > 0) {
156+
descriptions.push(
157+
block.nodes[0].type === "helpfeel" ||
158+
block.nodes[0].type === "commandLine"
159+
? makeInlineCodeForDescription(block.nodes[0].raw)
160+
: block.nodes.map((node) => node.raw).join("").trim().slice(
161+
0,
162+
200,
163+
),
164+
);
165+
}
154166
for (const node of block.nodes) {
155167
lookup(node);
156168
}
157169
continue;
170+
case "codeBlock":
171+
if (descriptions.length < 5) {
172+
descriptions.push(makeInlineCodeForDescription(block.content));
173+
}
174+
continue;
158175
case "table": {
159176
for (const row of block.cells) {
160177
for (const nodes of row) {
@@ -175,17 +192,25 @@ export const findMetadata = (
175192
}
176193
}
177194

195+
const lines = text.split("\n");
178196
return [
197+
title,
179198
links,
180199
projectLinks,
181200
icons,
182201
image,
202+
descriptions,
183203
[...files],
184204
[...helpfeels],
185205
infoboxDefinition,
206+
lines.length,
207+
lines.reduce((acc, line) => acc + [...line].length, 0),
186208
];
187209
};
188210

211+
const makeInlineCodeForDescription = (text: string): `\`${string}\`` =>
212+
`\`${text.trim().replaceAll("`", "\\`").slice(0, 198)}\``;
213+
189214
const cutId = (link: string): string => link.replace(/#[a-f\d]{24,32}$/, "");
190215

191216
/** Extract Helpfeel entries from text

‎websocket/makeChanges.ts

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import { diffToChanges } from "./diffToChanges.ts";
22
import type { Page } from "@cosense/types/rest";
33
import type { Change } from "./change.ts";
4-
import { findMetadata, getHelpfeels } from "./findMetadata.ts";
4+
import {
5+
getHelpfeels,
6+
getPageMetadataFromLines,
7+
} from "./getPageMetadataFromLines.ts";
58
import { isSameArray } from "./isSameArray.ts";
69
import { isString } from "@core/unknownutil/is/string";
710

@@ -22,22 +25,6 @@ export function* makeChanges(
2225
yield change;
2326
}
2427

25-
// Handle title changes
26-
// Note: We always include title change commits for new pages (`persistent === false`)
27-
// to ensure proper page initialization
28-
if (before.lines[0].text !== after_[0] || !before.persistent) {
29-
yield { title: after_[0] };
30-
}
31-
32-
// Process changes in page descriptions
33-
// Descriptions are the first 5 lines after the title (lines 1-5)
34-
// These lines provide a summary or additional context for the page
35-
const leftDescriptions = before.lines.slice(1, 6).map((line) => line.text);
36-
const rightDescriptions = after_.slice(1, 6);
37-
if (leftDescriptions.join("") !== rightDescriptions.join("")) {
38-
yield { descriptions: rightDescriptions };
39-
}
40-
4128
// Process changes in various metadata
4229
// Metadata includes:
4330
// - links: References to other pages
@@ -48,21 +35,32 @@ export function* makeChanges(
4835
// - helpfeels: Questions or help requests (lines starting with "?")
4936
// - infoboxDefinition: Structured data definitions
5037
const [
38+
title,
5139
links,
5240
projectLinks,
5341
icons,
5442
image,
43+
descriptions,
5544
files,
5645
helpfeels,
5746
infoboxDefinition,
58-
] = findMetadata(after_.join("\n"));
47+
linesCount,
48+
charsCount,
49+
] = getPageMetadataFromLines(after_.join("\n"));
50+
// Handle title changes
51+
// Note: We always include title change commits for new pages (`persistent === false`)
52+
// to ensure proper page initialization
53+
if (before.title !== title || !before.persistent) yield { title };
5954
if (!isSameArray(before.links, links)) yield { links };
6055
if (!isSameArray(before.projectLinks, projectLinks)) yield { projectLinks };
6156
if (!isSameArray(before.icons, icons)) yield { icons };
6257
if (before.image !== image) yield { image };
58+
if (!isSameArray(before.descriptions, descriptions)) yield { descriptions };
6359
if (!isSameArray(before.files, files)) yield { files };
6460
if (!isSameArray(getHelpfeels(before.lines), helpfeels)) yield { helpfeels };
6561
if (!isSameArray(before.infoboxDefinition, infoboxDefinition)) {
6662
yield { infoboxDefinition };
6763
}
64+
yield { linesCount };
65+
yield { charsCount };
6866
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /