Escape markdown sequences (#2208)

* escape inline markdown character

* fix typo

* improve document around custom markdown plugin and add escape sequence utils

* recover inline escape sequences on edit

* remove escape sequences from plain text body

* use `s` for strike-through instead of del

* escape block markdown sequences

* fix remove escape sequence was not removing all slashes from plain text

* recover block sequences on edit
This commit is contained in:
Ajay Bura 2025-02-21 19:19:24 +11:00 committed by GitHub
parent b63868bbb5
commit 7456c152b7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 764 additions and 476 deletions

View file

@ -26,48 +26,75 @@ import {
testMatrixTo,
} from '../../plugins/matrix-to';
import { tryDecodeURIComponent } from '../../utils/dom';
import {
escapeMarkdownInlineSequences,
escapeMarkdownBlockSequences,
} from '../../plugins/markdown';
const markNodeToType: Record<string, MarkType> = {
b: MarkType.Bold,
strong: MarkType.Bold,
i: MarkType.Italic,
em: MarkType.Italic,
u: MarkType.Underline,
s: MarkType.StrikeThrough,
del: MarkType.StrikeThrough,
code: MarkType.Code,
span: MarkType.Spoiler,
};
type ProcessTextCallback = (text: string) => string;
const elementToTextMark = (node: Element): MarkType | undefined => {
const markType = markNodeToType[node.name];
if (!markType) return undefined;
if (markType === MarkType.Spoiler && node.attribs['data-mx-spoiler'] === undefined) {
return undefined;
}
if (
markType === MarkType.Code &&
node.parent &&
'name' in node.parent &&
node.parent.name === 'pre'
) {
return undefined;
}
return markType;
};
const parseNodeText = (node: ChildNode): string => {
const getText = (node: ChildNode): string => {
if (isText(node)) {
return node.data;
}
if (isTag(node)) {
return node.children.map((child) => parseNodeText(child)).join('');
return node.children.map((child) => getText(child)).join('');
}
return '';
};
const elementToInlineNode = (node: Element): MentionElement | EmoticonElement | undefined => {
const getInlineNodeMarkType = (node: Element): MarkType | undefined => {
if (node.name === 'b' || node.name === 'strong') {
return MarkType.Bold;
}
if (node.name === 'i' || node.name === 'em') {
return MarkType.Italic;
}
if (node.name === 'u') {
return MarkType.Underline;
}
if (node.name === 's' || node.name === 'del') {
return MarkType.StrikeThrough;
}
if (node.name === 'code') {
if (node.parent && 'name' in node.parent && node.parent.name === 'pre') {
return undefined; // Don't apply `Code` mark inside a <pre> tag
}
return MarkType.Code;
}
if (node.name === 'span' && node.attribs['data-mx-spoiler'] !== undefined) {
return MarkType.Spoiler;
}
return undefined;
};
const getInlineMarkElement = (
markType: MarkType,
node: Element,
getChild: (child: ChildNode) => InlineElement[]
): InlineElement[] => {
const children = node.children.flatMap(getChild);
const mdSequence = node.attribs['data-md'];
if (mdSequence !== undefined) {
children.unshift({ text: mdSequence });
children.push({ text: mdSequence });
return children;
}
children.forEach((child) => {
if (Text.isText(child)) {
child[markType] = true;
}
});
return children;
};
const getInlineNonMarkElement = (node: Element): MentionElement | EmoticonElement | undefined => {
if (node.name === 'img' && node.attribs['data-mx-emoticon'] !== undefined) {
const { src, alt } = node.attribs;
if (!src) return undefined;
@ -79,13 +106,13 @@ const elementToInlineNode = (node: Element): MentionElement | EmoticonElement |
if (testMatrixTo(href)) {
const userMention = parseMatrixToUser(href);
if (userMention) {
return createMentionElement(userMention, parseNodeText(node) || userMention, false);
return createMentionElement(userMention, getText(node) || userMention, false);
}
const roomMention = parseMatrixToRoom(href);
if (roomMention) {
return createMentionElement(
roomMention.roomIdOrAlias,
parseNodeText(node) || roomMention.roomIdOrAlias,
getText(node) || roomMention.roomIdOrAlias,
false,
undefined,
roomMention.viaServers
@ -95,7 +122,7 @@ const elementToInlineNode = (node: Element): MentionElement | EmoticonElement |
if (eventMention) {
return createMentionElement(
eventMention.roomIdOrAlias,
parseNodeText(node) || eventMention.roomIdOrAlias,
getText(node) || eventMention.roomIdOrAlias,
false,
eventMention.eventId,
eventMention.viaServers
@ -106,44 +133,40 @@ const elementToInlineNode = (node: Element): MentionElement | EmoticonElement |
return undefined;
};
const parseInlineNodes = (node: ChildNode): InlineElement[] => {
const getInlineElement = (node: ChildNode, processText: ProcessTextCallback): InlineElement[] => {
if (isText(node)) {
return [{ text: node.data }];
}
if (isTag(node)) {
const markType = elementToTextMark(node);
if (markType) {
const children = node.children.flatMap(parseInlineNodes);
if (node.attribs['data-md'] !== undefined) {
children.unshift({ text: node.attribs['data-md'] });
children.push({ text: node.attribs['data-md'] });
} else {
children.forEach((child) => {
if (Text.isText(child)) {
child[markType] = true;
}
});
}
return children;
return [{ text: processText(node.data) }];
}
const inlineNode = elementToInlineNode(node);
if (isTag(node)) {
const markType = getInlineNodeMarkType(node);
if (markType) {
return getInlineMarkElement(markType, node, (child) => {
if (markType === MarkType.Code) return [{ text: getText(child) }];
return getInlineElement(child, processText);
});
}
const inlineNode = getInlineNonMarkElement(node);
if (inlineNode) return [inlineNode];
if (node.name === 'a') {
const children = node.childNodes.flatMap(parseInlineNodes);
const children = node.childNodes.flatMap((child) => getInlineElement(child, processText));
children.unshift({ text: '[' });
children.push({ text: `](${node.attribs.href})` });
return children;
}
return node.childNodes.flatMap(parseInlineNodes);
return node.childNodes.flatMap((child) => getInlineElement(child, processText));
}
return [];
};
const parseBlockquoteNode = (node: Element): BlockQuoteElement[] | ParagraphElement[] => {
const parseBlockquoteNode = (
node: Element,
processText: ProcessTextCallback
): BlockQuoteElement[] | ParagraphElement[] => {
const quoteLines: Array<InlineElement[]> = [];
let lineHolder: InlineElement[] = [];
@ -156,7 +179,7 @@ const parseBlockquoteNode = (node: Element): BlockQuoteElement[] | ParagraphElem
node.children.forEach((child) => {
if (isText(child)) {
lineHolder.push({ text: child.data });
lineHolder.push({ text: processText(child.data) });
return;
}
if (isTag(child)) {
@ -168,19 +191,20 @@ const parseBlockquoteNode = (node: Element): BlockQuoteElement[] | ParagraphElem
if (child.name === 'p') {
appendLine();
quoteLines.push(child.children.flatMap((c) => parseInlineNodes(c)));
quoteLines.push(child.children.flatMap((c) => getInlineElement(c, processText)));
return;
}
parseInlineNodes(child).forEach((inlineNode) => lineHolder.push(inlineNode));
lineHolder.push(...getInlineElement(child, processText));
}
});
appendLine();
if (node.attribs['data-md'] !== undefined) {
const mdSequence = node.attribs['data-md'];
if (mdSequence !== undefined) {
return quoteLines.map((lineChildren) => ({
type: BlockType.Paragraph,
children: [{ text: `${node.attribs['data-md']} ` }, ...lineChildren],
children: [{ text: `${mdSequence} ` }, ...lineChildren],
}));
}
@ -195,22 +219,19 @@ const parseBlockquoteNode = (node: Element): BlockQuoteElement[] | ParagraphElem
];
};
const parseCodeBlockNode = (node: Element): CodeBlockElement[] | ParagraphElement[] => {
const codeLines = parseNodeText(node).trim().split('\n');
const codeLines = getText(node).trim().split('\n');
if (node.attribs['data-md'] !== undefined) {
const pLines = codeLines.map<ParagraphElement>((lineText) => ({
const mdSequence = node.attribs['data-md'];
if (mdSequence !== undefined) {
const pLines = codeLines.map<ParagraphElement>((text) => ({
type: BlockType.Paragraph,
children: [
{
text: lineText,
},
],
children: [{ text }],
}));
const childCode = node.children[0];
const className =
isTag(childCode) && childCode.tagName === 'code' ? childCode.attribs.class ?? '' : '';
const prefix = { text: `${node.attribs['data-md']}${className.replace('language-', '')}` };
const suffix = { text: node.attribs['data-md'] };
const prefix = { text: `${mdSequence}${className.replace('language-', '')}` };
const suffix = { text: mdSequence };
return [
{ type: BlockType.Paragraph, children: [prefix] },
...pLines,
@ -221,19 +242,16 @@ const parseCodeBlockNode = (node: Element): CodeBlockElement[] | ParagraphElemen
return [
{
type: BlockType.CodeBlock,
children: codeLines.map<CodeLineElement>((lineTxt) => ({
children: codeLines.map<CodeLineElement>((text) => ({
type: BlockType.CodeLine,
children: [
{
text: lineTxt,
},
],
children: [{ text }],
})),
},
];
};
const parseListNode = (
node: Element
node: Element,
processText: ProcessTextCallback
): OrderedListElement[] | UnorderedListElement[] | ParagraphElement[] => {
const listLines: Array<InlineElement[]> = [];
let lineHolder: InlineElement[] = [];
@ -247,7 +265,7 @@ const parseListNode = (
node.children.forEach((child) => {
if (isText(child)) {
lineHolder.push({ text: child.data });
lineHolder.push({ text: processText(child.data) });
return;
}
if (isTag(child)) {
@ -259,17 +277,18 @@ const parseListNode = (
if (child.name === 'li') {
appendLine();
listLines.push(child.children.flatMap((c) => parseInlineNodes(c)));
listLines.push(child.children.flatMap((c) => getInlineElement(c, processText)));
return;
}
parseInlineNodes(child).forEach((inlineNode) => lineHolder.push(inlineNode));
lineHolder.push(...getInlineElement(child, processText));
}
});
appendLine();
if (node.attribs['data-md'] !== undefined) {
const prefix = node.attribs['data-md'] || '-';
const mdSequence = node.attribs['data-md'];
if (mdSequence !== undefined) {
const prefix = mdSequence || '-';
const [starOrHyphen] = prefix.match(/^\*|-$/) ?? [];
return listLines.map((lineChildren) => ({
type: BlockType.Paragraph,
@ -302,17 +321,21 @@ const parseListNode = (
},
];
};
const parseHeadingNode = (node: Element): HeadingElement | ParagraphElement => {
const children = node.children.flatMap((child) => parseInlineNodes(child));
const parseHeadingNode = (
node: Element,
processText: ProcessTextCallback
): HeadingElement | ParagraphElement => {
const children = node.children.flatMap((child) => getInlineElement(child, processText));
const headingMatch = node.name.match(/^h([123456])$/);
const [, g1AsLevel] = headingMatch ?? ['h3', '3'];
const level = parseInt(g1AsLevel, 10);
if (node.attribs['data-md'] !== undefined) {
const mdSequence = node.attribs['data-md'];
if (mdSequence !== undefined) {
return {
type: BlockType.Paragraph,
children: [{ text: `${node.attribs['data-md']} ` }, ...children],
children: [{ text: `${mdSequence} ` }, ...children],
};
}
@ -323,7 +346,11 @@ const parseHeadingNode = (node: Element): HeadingElement | ParagraphElement => {
};
};
export const domToEditorInput = (domNodes: ChildNode[]): Descendant[] => {
export const domToEditorInput = (
domNodes: ChildNode[],
processText: ProcessTextCallback,
processLineStartText: ProcessTextCallback
): Descendant[] => {
const children: Descendant[] = [];
let lineHolder: InlineElement[] = [];
@ -340,7 +367,14 @@ export const domToEditorInput = (domNodes: ChildNode[]): Descendant[] => {
domNodes.forEach((node) => {
if (isText(node)) {
lineHolder.push({ text: node.data });
if (lineHolder.length === 0) {
// we are inserting first part of line
// it may contain block markdown starting data
// that we may need to escape.
lineHolder.push({ text: processLineStartText(node.data) });
return;
}
lineHolder.push({ text: processText(node.data) });
return;
}
if (isTag(node)) {
@ -354,14 +388,14 @@ export const domToEditorInput = (domNodes: ChildNode[]): Descendant[] => {
appendLine();
children.push({
type: BlockType.Paragraph,
children: node.children.flatMap((child) => parseInlineNodes(child)),
children: node.children.flatMap((child) => getInlineElement(child, processText)),
});
return;
}
if (node.name === 'blockquote') {
appendLine();
children.push(...parseBlockquoteNode(node));
children.push(...parseBlockquoteNode(node, processText));
return;
}
if (node.name === 'pre') {
@ -371,17 +405,17 @@ export const domToEditorInput = (domNodes: ChildNode[]): Descendant[] => {
}
if (node.name === 'ol' || node.name === 'ul') {
appendLine();
children.push(...parseListNode(node));
children.push(...parseListNode(node, processText));
return;
}
if (node.name.match(/^h[123456]$/)) {
appendLine();
children.push(parseHeadingNode(node));
children.push(parseHeadingNode(node, processText));
return;
}
parseInlineNodes(node).forEach((inlineNode) => lineHolder.push(inlineNode));
lineHolder.push(...getInlineElement(node, processText));
}
});
appendLine();
@ -389,21 +423,31 @@ export const domToEditorInput = (domNodes: ChildNode[]): Descendant[] => {
return children;
};
export const htmlToEditorInput = (unsafeHtml: string): Descendant[] => {
export const htmlToEditorInput = (unsafeHtml: string, markdown?: boolean): Descendant[] => {
const sanitizedHtml = sanitizeCustomHtml(unsafeHtml);
const processText = (partText: string) => {
if (!markdown) return partText;
return escapeMarkdownInlineSequences(partText);
};
const domNodes = parse(sanitizedHtml);
const editorNodes = domToEditorInput(domNodes);
const editorNodes = domToEditorInput(domNodes, processText, (lineStartText: string) => {
if (!markdown) return lineStartText;
return escapeMarkdownBlockSequences(lineStartText, processText);
});
return editorNodes;
};
export const plainToEditorInput = (text: string): Descendant[] => {
export const plainToEditorInput = (text: string, markdown?: boolean): Descendant[] => {
const editorNodes: Descendant[] = text.split('\n').map((lineText) => {
const paragraphNode: ParagraphElement = {
type: BlockType.Paragraph,
children: [
{
text: lineText,
text: markdown
? escapeMarkdownBlockSequences(lineText, escapeMarkdownInlineSequences)
: lineText,
},
],
};

View file

@ -3,7 +3,12 @@ import { Descendant, Text } from 'slate';
import { sanitizeText } from '../../utils/sanitize';
import { BlockType } from './types';
import { CustomElement } from './slate';
import { parseBlockMD, parseInlineMD } from '../../plugins/markdown';
import {
parseBlockMD,
parseInlineMD,
unescapeMarkdownBlockSequences,
unescapeMarkdownInlineSequences,
} from '../../plugins/markdown';
import { findAndReplace } from '../../utils/findAndReplace';
import { sanitizeForRegex } from '../../utils/regex';
@ -19,7 +24,7 @@ const textToCustomHtml = (node: Text, opts: OutputOptions): string => {
if (node.bold) string = `<strong>${string}</strong>`;
if (node.italic) string = `<i>${string}</i>`;
if (node.underline) string = `<u>${string}</u>`;
if (node.strikeThrough) string = `<del>${string}</del>`;
if (node.strikeThrough) string = `<s>${string}</s>`;
if (node.code) string = `<code>${string}</code>`;
if (node.spoiler) string = `<span data-mx-spoiler>${string}</span>`;
}
@ -102,7 +107,8 @@ export const toMatrixCustomHTML = (
allowBlockMarkdown: false,
})
.replace(/<br\/>$/, '\n')
.replace(/^&gt;/, '>');
.replace(/^(\\*)&gt;/, '$1>');
markdownLines += line;
if (index === targetNodes.length - 1) {
return parseBlockMD(markdownLines, ignoreHTMLParseInlineMD);
@ -157,11 +163,14 @@ const elementToPlainText = (node: CustomElement, children: string): string => {
}
};
export const toPlainText = (node: Descendant | Descendant[]): string => {
if (Array.isArray(node)) return node.map((n) => toPlainText(n)).join('');
if (Text.isText(node)) return node.text;
export const toPlainText = (node: Descendant | Descendant[], isMarkdown: boolean): string => {
if (Array.isArray(node)) return node.map((n) => toPlainText(n, isMarkdown)).join('');
if (Text.isText(node))
return isMarkdown
? unescapeMarkdownBlockSequences(node.text, unescapeMarkdownInlineSequences)
: node.text;
const children = node.children.map((n) => toPlainText(n)).join('');
const children = node.children.map((n) => toPlainText(n, isMarkdown)).join('');
return elementToPlainText(node, children);
};

View file

@ -255,7 +255,7 @@ export const RoomInput = forwardRef<HTMLDivElement, RoomInputProps>(
const commandName = getBeginCommand(editor);
let plainText = toPlainText(editor.children).trim();
let plainText = toPlainText(editor.children, isMarkdown).trim();
let customHtml = trimCustomHtml(
toMatrixCustomHTML(editor.children, {
allowTextFormatting: true,

View file

@ -92,7 +92,7 @@ export const MessageEditor = as<'div', MessageEditorProps>(
const [saveState, save] = useAsyncCallback(
useCallback(async () => {
const plainText = toPlainText(editor.children).trim();
const plainText = toPlainText(editor.children, isMarkdown).trim();
const customHtml = trimCustomHtml(
toMatrixCustomHTML(editor.children, {
allowTextFormatting: true,
@ -192,8 +192,8 @@ export const MessageEditor = as<'div', MessageEditorProps>(
const initialValue =
typeof customHtml === 'string'
? htmlToEditorInput(customHtml)
: plainToEditorInput(typeof body === 'string' ? body : '');
? htmlToEditorInput(customHtml, isMarkdown)
: plainToEditorInput(typeof body === 'string' ? body : '', isMarkdown);
Transforms.select(editor, {
anchor: Editor.start(editor, []),
@ -202,7 +202,7 @@ export const MessageEditor = as<'div', MessageEditorProps>(
editor.insertFragment(initialValue);
if (!mobileOrTablet()) ReactEditor.focus(editor);
}, [editor, getPrevBodyAndFormattedBody]);
}, [editor, getPrevBodyAndFormattedBody, isMarkdown]);
useEffect(() => {
if (saveState.status === AsyncStatus.Success) {

View file

@ -1,368 +0,0 @@
export type MatchResult = RegExpMatchArray | RegExpExecArray;
export type RuleMatch = (text: string) => MatchResult | null;
export const beforeMatch = (text: string, match: RegExpMatchArray | RegExpExecArray): string =>
text.slice(0, match.index);
export const afterMatch = (text: string, match: RegExpMatchArray | RegExpExecArray): string =>
text.slice((match.index ?? 0) + match[0].length);
export const replaceMatch = <C>(
convertPart: (txt: string) => Array<string | C>,
text: string,
match: MatchResult,
content: C
): Array<string | C> => [
...convertPart(beforeMatch(text, match)),
content,
...convertPart(afterMatch(text, match)),
];
/*
*****************
* INLINE PARSER *
*****************
*/
export type InlineMDParser = (text: string) => string;
export type InlineMatchConverter = (parse: InlineMDParser, match: MatchResult) => string;
export type InlineMDRule = {
match: RuleMatch;
html: InlineMatchConverter;
};
export type InlineRuleRunner = (
parse: InlineMDParser,
text: string,
rule: InlineMDRule
) => string | undefined;
export type InlineRulesRunner = (
parse: InlineMDParser,
text: string,
rules: InlineMDRule[]
) => string | undefined;
const MIN_ANY = '(.+?)';
const URL_NEG_LB = '(?<!(https?|ftp|mailto|magnet):\\/\\/\\S*)';
const BOLD_MD_1 = '**';
const BOLD_PREFIX_1 = '\\*{2}';
const BOLD_NEG_LA_1 = '(?!\\*)';
const BOLD_REG_1 = new RegExp(
`${URL_NEG_LB}${BOLD_PREFIX_1}${MIN_ANY}${BOLD_PREFIX_1}${BOLD_NEG_LA_1}`
);
const BoldRule: InlineMDRule = {
match: (text) => text.match(BOLD_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<strong data-md="${BOLD_MD_1}">${parse(g2)}</strong>`;
},
};
const ITALIC_MD_1 = '*';
const ITALIC_PREFIX_1 = '\\*';
const ITALIC_NEG_LA_1 = '(?!\\*)';
const ITALIC_REG_1 = new RegExp(
`${URL_NEG_LB}${ITALIC_PREFIX_1}${MIN_ANY}${ITALIC_PREFIX_1}${ITALIC_NEG_LA_1}`
);
const ItalicRule1: InlineMDRule = {
match: (text) => text.match(ITALIC_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<i data-md="${ITALIC_MD_1}">${parse(g2)}</i>`;
},
};
const ITALIC_MD_2 = '_';
const ITALIC_PREFIX_2 = '_';
const ITALIC_NEG_LA_2 = '(?!_)';
const ITALIC_REG_2 = new RegExp(
`${URL_NEG_LB}${ITALIC_PREFIX_2}${MIN_ANY}${ITALIC_PREFIX_2}${ITALIC_NEG_LA_2}`
);
const ItalicRule2: InlineMDRule = {
match: (text) => text.match(ITALIC_REG_2),
html: (parse, match) => {
const [, , g2] = match;
return `<i data-md="${ITALIC_MD_2}">${parse(g2)}</i>`;
},
};
const UNDERLINE_MD_1 = '__';
const UNDERLINE_PREFIX_1 = '_{2}';
const UNDERLINE_NEG_LA_1 = '(?!_)';
const UNDERLINE_REG_1 = new RegExp(
`${URL_NEG_LB}${UNDERLINE_PREFIX_1}${MIN_ANY}${UNDERLINE_PREFIX_1}${UNDERLINE_NEG_LA_1}`
);
const UnderlineRule: InlineMDRule = {
match: (text) => text.match(UNDERLINE_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<u data-md="${UNDERLINE_MD_1}">${parse(g2)}</u>`;
},
};
const STRIKE_MD_1 = '~~';
const STRIKE_PREFIX_1 = '~{2}';
const STRIKE_NEG_LA_1 = '(?!~)';
const STRIKE_REG_1 = new RegExp(
`${URL_NEG_LB}${STRIKE_PREFIX_1}${MIN_ANY}${STRIKE_PREFIX_1}${STRIKE_NEG_LA_1}`
);
const StrikeRule: InlineMDRule = {
match: (text) => text.match(STRIKE_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<del data-md="${STRIKE_MD_1}">${parse(g2)}</del>`;
},
};
const CODE_MD_1 = '`';
const CODE_PREFIX_1 = '`';
const CODE_NEG_LA_1 = '(?!`)';
const CODE_REG_1 = new RegExp(`${URL_NEG_LB}${CODE_PREFIX_1}(.+?)${CODE_PREFIX_1}${CODE_NEG_LA_1}`);
const CodeRule: InlineMDRule = {
match: (text) => text.match(CODE_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<code data-md="${CODE_MD_1}">${g2}</code>`;
},
};
const SPOILER_MD_1 = '||';
const SPOILER_PREFIX_1 = '\\|{2}';
const SPOILER_NEG_LA_1 = '(?!\\|)';
const SPOILER_REG_1 = new RegExp(
`${URL_NEG_LB}${SPOILER_PREFIX_1}${MIN_ANY}${SPOILER_PREFIX_1}${SPOILER_NEG_LA_1}`
);
const SpoilerRule: InlineMDRule = {
match: (text) => text.match(SPOILER_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<span data-md="${SPOILER_MD_1}" data-mx-spoiler>${parse(g2)}</span>`;
},
};
const LINK_ALT = `\\[${MIN_ANY}\\]`;
const LINK_URL = `\\((https?:\\/\\/.+?)\\)`;
const LINK_REG_1 = new RegExp(`${LINK_ALT}${LINK_URL}`);
const LinkRule: InlineMDRule = {
match: (text) => text.match(LINK_REG_1),
html: (parse, match) => {
const [, g1, g2] = match;
return `<a data-md href="${g2}">${parse(g1)}</a>`;
},
};
const runInlineRule: InlineRuleRunner = (parse, text, rule) => {
const matchResult = rule.match(text);
if (matchResult) {
const content = rule.html(parse, matchResult);
return replaceMatch((txt) => [parse(txt)], text, matchResult, content).join('');
}
return undefined;
};
/**
* Runs multiple rules at the same time to better handle nested rules.
* Rules will be run in the order they appear.
*/
const runInlineRules: InlineRulesRunner = (parse, text, rules) => {
const matchResults = rules.map((rule) => rule.match(text));
let targetRule: InlineMDRule | undefined;
let targetResult: MatchResult | undefined;
for (let i = 0; i < matchResults.length; i += 1) {
const currentResult = matchResults[i];
if (currentResult && typeof currentResult.index === 'number') {
if (
!targetResult ||
(typeof targetResult?.index === 'number' && currentResult.index < targetResult.index)
) {
targetResult = currentResult;
targetRule = rules[i];
}
}
}
if (targetRule && targetResult) {
const content = targetRule.html(parse, targetResult);
return replaceMatch((txt) => [parse(txt)], text, targetResult, content).join('');
}
return undefined;
};
const LeveledRules = [
BoldRule,
ItalicRule1,
UnderlineRule,
ItalicRule2,
StrikeRule,
SpoilerRule,
LinkRule,
];
export const parseInlineMD: InlineMDParser = (text) => {
if (text === '') return text;
let result: string | undefined;
if (!result) result = runInlineRule(parseInlineMD, text, CodeRule);
if (!result) result = runInlineRules(parseInlineMD, text, LeveledRules);
return result ?? text;
};
/*
****************
* BLOCK PARSER *
****************
*/
export type BlockMDParser = (test: string, parseInline?: (txt: string) => string) => string;
export type BlockMatchConverter = (
match: MatchResult,
parseInline?: (txt: string) => string
) => string;
export type BlockMDRule = {
match: RuleMatch;
html: BlockMatchConverter;
};
export type BlockRuleRunner = (
parse: BlockMDParser,
text: string,
rule: BlockMDRule,
parseInline?: (txt: string) => string
) => string | undefined;
const HEADING_REG_1 = /^(#{1,6}) +(.+)\n?/m;
const HeadingRule: BlockMDRule = {
match: (text) => text.match(HEADING_REG_1),
html: (match, parseInline) => {
const [, g1, g2] = match;
const level = g1.length;
return `<h${level} data-md="${g1}">${parseInline ? parseInline(g2) : g2}</h${level}>`;
},
};
const CODEBLOCK_MD_1 = '```';
const CODEBLOCK_REG_1 = /^`{3}(\S*)\n((?:.*\n)+?)`{3} *(?!.)\n?/m;
const CodeBlockRule: BlockMDRule = {
match: (text) => text.match(CODEBLOCK_REG_1),
html: (match) => {
const [, g1, g2] = match;
const classNameAtt = g1 ? ` class="language-${g1}"` : '';
return `<pre data-md="${CODEBLOCK_MD_1}"><code${classNameAtt}>${g2}</code></pre>`;
},
};
const BLOCKQUOTE_MD_1 = '>';
const QUOTE_LINE_PREFIX = /^> */;
const BLOCKQUOTE_TRAILING_NEWLINE = /\n$/;
const BLOCKQUOTE_REG_1 = /(^>.*\n?)+/m;
const BlockQuoteRule: BlockMDRule = {
match: (text) => text.match(BLOCKQUOTE_REG_1),
html: (match, parseInline) => {
const [blockquoteText] = match;
const lines = blockquoteText
.replace(BLOCKQUOTE_TRAILING_NEWLINE, '')
.split('\n')
.map((lineText) => {
const line = lineText.replace(QUOTE_LINE_PREFIX, '');
if (parseInline) return `${parseInline(line)}<br/>`;
return `${line}<br/>`;
})
.join('');
return `<blockquote data-md="${BLOCKQUOTE_MD_1}">${lines}</blockquote>`;
},
};
const ORDERED_LIST_MD_1 = '-';
const O_LIST_ITEM_PREFIX = /^(-|[\da-zA-Z]\.) */;
const O_LIST_START = /^([\d])\./;
const O_LIST_TYPE = /^([aAiI])\./;
const O_LIST_TRAILING_NEWLINE = /\n$/;
const ORDERED_LIST_REG_1 = /(^(?:-|[\da-zA-Z]\.) +.+\n?)+/m;
const OrderedListRule: BlockMDRule = {
match: (text) => text.match(ORDERED_LIST_REG_1),
html: (match, parseInline) => {
const [listText] = match;
const [, listStart] = listText.match(O_LIST_START) ?? [];
const [, listType] = listText.match(O_LIST_TYPE) ?? [];
const lines = listText
.replace(O_LIST_TRAILING_NEWLINE, '')
.split('\n')
.map((lineText) => {
const line = lineText.replace(O_LIST_ITEM_PREFIX, '');
const txt = parseInline ? parseInline(line) : line;
return `<li><p>${txt}</p></li>`;
})
.join('');
const dataMdAtt = `data-md="${listType || listStart || ORDERED_LIST_MD_1}"`;
const startAtt = listStart ? ` start="${listStart}"` : '';
const typeAtt = listType ? ` type="${listType}"` : '';
return `<ol ${dataMdAtt}${startAtt}${typeAtt}>${lines}</ol>`;
},
};
const UNORDERED_LIST_MD_1 = '*';
const U_LIST_ITEM_PREFIX = /^\* */;
const U_LIST_TRAILING_NEWLINE = /\n$/;
const UNORDERED_LIST_REG_1 = /(^\* +.+\n?)+/m;
const UnorderedListRule: BlockMDRule = {
match: (text) => text.match(UNORDERED_LIST_REG_1),
html: (match, parseInline) => {
const [listText] = match;
const lines = listText
.replace(U_LIST_TRAILING_NEWLINE, '')
.split('\n')
.map((lineText) => {
const line = lineText.replace(U_LIST_ITEM_PREFIX, '');
const txt = parseInline ? parseInline(line) : line;
return `<li><p>${txt}</p></li>`;
})
.join('');
return `<ul data-md="${UNORDERED_LIST_MD_1}">${lines}</ul>`;
},
};
const runBlockRule: BlockRuleRunner = (parse, text, rule, parseInline) => {
const matchResult = rule.match(text);
if (matchResult) {
const content = rule.html(matchResult, parseInline);
return replaceMatch((txt) => [parse(txt, parseInline)], text, matchResult, content).join('');
}
return undefined;
};
export const parseBlockMD: BlockMDParser = (text, parseInline) => {
if (text === '') return text;
let result: string | undefined;
if (!result) result = runBlockRule(parseBlockMD, text, CodeBlockRule, parseInline);
if (!result) result = runBlockRule(parseBlockMD, text, BlockQuoteRule, parseInline);
if (!result) result = runBlockRule(parseBlockMD, text, OrderedListRule, parseInline);
if (!result) result = runBlockRule(parseBlockMD, text, UnorderedListRule, parseInline);
if (!result) result = runBlockRule(parseBlockMD, text, HeadingRule, parseInline);
// replace \n with <br/> because want to preserve empty lines
if (!result) {
if (parseInline) {
result = text
.split('\n')
.map((lineText) => parseInline(lineText))
.join('<br/>');
} else {
result = text.replace(/\n/g, '<br/>');
}
}
return result ?? text;
};

View file

@ -0,0 +1 @@
export * from './parser';

View file

@ -0,0 +1,47 @@
import { replaceMatch } from '../internal';
import {
BlockQuoteRule,
CodeBlockRule,
ESC_BLOCK_SEQ,
HeadingRule,
OrderedListRule,
UnorderedListRule,
} from './rules';
import { runBlockRule } from './runner';
import { BlockMDParser } from './type';
/**
* Parses block-level markdown text into HTML using defined block rules.
*
* @param text - The markdown text to be parsed.
* @param parseInline - Optional function to parse inline elements.
* @returns The parsed HTML or the original text if no block-level markdown was found.
*/
export const parseBlockMD: BlockMDParser = (text, parseInline) => {
if (text === '') return text;
let result: string | undefined;
if (!result) result = runBlockRule(text, CodeBlockRule, parseBlockMD, parseInline);
if (!result) result = runBlockRule(text, BlockQuoteRule, parseBlockMD, parseInline);
if (!result) result = runBlockRule(text, OrderedListRule, parseBlockMD, parseInline);
if (!result) result = runBlockRule(text, UnorderedListRule, parseBlockMD, parseInline);
if (!result) result = runBlockRule(text, HeadingRule, parseBlockMD, parseInline);
// replace \n with <br/> because want to preserve empty lines
if (!result) {
result = text
.split('\n')
.map((lineText) => {
const match = lineText.match(ESC_BLOCK_SEQ);
if (!match) {
return parseInline?.(lineText) ?? lineText;
}
const [, g1] = match;
return replaceMatch(lineText, match, g1, (t) => [parseInline?.(t) ?? t]).join('');
})
.join('<br/>');
}
return result ?? text;
};

View file

@ -0,0 +1,100 @@
import { BlockMDRule } from './type';
const HEADING_REG_1 = /^(#{1,6}) +(.+)\n?/m;
export const HeadingRule: BlockMDRule = {
match: (text) => text.match(HEADING_REG_1),
html: (match, parseInline) => {
const [, g1, g2] = match;
const level = g1.length;
return `<h${level} data-md="${g1}">${parseInline ? parseInline(g2) : g2}</h${level}>`;
},
};
const CODEBLOCK_MD_1 = '```';
const CODEBLOCK_REG_1 = /^`{3}(\S*)\n((?:.*\n)+?)`{3} *(?!.)\n?/m;
export const CodeBlockRule: BlockMDRule = {
match: (text) => text.match(CODEBLOCK_REG_1),
html: (match) => {
const [, g1, g2] = match;
const classNameAtt = g1 ? ` class="language-${g1}"` : '';
return `<pre data-md="${CODEBLOCK_MD_1}"><code${classNameAtt}>${g2}</code></pre>`;
},
};
const BLOCKQUOTE_MD_1 = '>';
const QUOTE_LINE_PREFIX = /^> */;
const BLOCKQUOTE_TRAILING_NEWLINE = /\n$/;
const BLOCKQUOTE_REG_1 = /(^>.*\n?)+/m;
export const BlockQuoteRule: BlockMDRule = {
match: (text) => text.match(BLOCKQUOTE_REG_1),
html: (match, parseInline) => {
const [blockquoteText] = match;
const lines = blockquoteText
.replace(BLOCKQUOTE_TRAILING_NEWLINE, '')
.split('\n')
.map((lineText) => {
const line = lineText.replace(QUOTE_LINE_PREFIX, '');
if (parseInline) return `${parseInline(line)}<br/>`;
return `${line}<br/>`;
})
.join('');
return `<blockquote data-md="${BLOCKQUOTE_MD_1}">${lines}</blockquote>`;
},
};
const ORDERED_LIST_MD_1 = '-';
const O_LIST_ITEM_PREFIX = /^(-|[\da-zA-Z]\.) */;
const O_LIST_START = /^([\d])\./;
const O_LIST_TYPE = /^([aAiI])\./;
const O_LIST_TRAILING_NEWLINE = /\n$/;
const ORDERED_LIST_REG_1 = /(^(?:-|[\da-zA-Z]\.) +.+\n?)+/m;
export const OrderedListRule: BlockMDRule = {
match: (text) => text.match(ORDERED_LIST_REG_1),
html: (match, parseInline) => {
const [listText] = match;
const [, listStart] = listText.match(O_LIST_START) ?? [];
const [, listType] = listText.match(O_LIST_TYPE) ?? [];
const lines = listText
.replace(O_LIST_TRAILING_NEWLINE, '')
.split('\n')
.map((lineText) => {
const line = lineText.replace(O_LIST_ITEM_PREFIX, '');
const txt = parseInline ? parseInline(line) : line;
return `<li><p>${txt}</p></li>`;
})
.join('');
const dataMdAtt = `data-md="${listType || listStart || ORDERED_LIST_MD_1}"`;
const startAtt = listStart ? ` start="${listStart}"` : '';
const typeAtt = listType ? ` type="${listType}"` : '';
return `<ol ${dataMdAtt}${startAtt}${typeAtt}>${lines}</ol>`;
},
};
const UNORDERED_LIST_MD_1 = '*';
const U_LIST_ITEM_PREFIX = /^\* */;
const U_LIST_TRAILING_NEWLINE = /\n$/;
const UNORDERED_LIST_REG_1 = /(^\* +.+\n?)+/m;
export const UnorderedListRule: BlockMDRule = {
match: (text) => text.match(UNORDERED_LIST_REG_1),
html: (match, parseInline) => {
const [listText] = match;
const lines = listText
.replace(U_LIST_TRAILING_NEWLINE, '')
.split('\n')
.map((lineText) => {
const line = lineText.replace(U_LIST_ITEM_PREFIX, '');
const txt = parseInline ? parseInline(line) : line;
return `<li><p>${txt}</p></li>`;
})
.join('');
return `<ul data-md="${UNORDERED_LIST_MD_1}">${lines}</ul>`;
},
};
export const UN_ESC_BLOCK_SEQ = /^\\*(#{1,6} +|```|>|(-|[\da-zA-Z]\.) +|\* +)/;
export const ESC_BLOCK_SEQ = /^\\(\\*(#{1,6} +|```|>|(-|[\da-zA-Z]\.) +|\* +))/;

View file

@ -0,0 +1,25 @@
import { replaceMatch } from '../internal';
import { BlockMDParser, BlockMDRule } from './type';
/**
* Parses block-level markdown text into HTML using defined block rules.
*
* @param text - The text to parse.
* @param rule - The markdown rule to run.
* @param parse - A function that run the parser on remaining parts..
* @param parseInline - Optional function to parse inline elements.
* @returns The text with the markdown rule applied or `undefined` if no match is found.
*/
export const runBlockRule = (
text: string,
rule: BlockMDRule,
parse: BlockMDParser,
parseInline?: (txt: string) => string
): string | undefined => {
const matchResult = rule.match(text);
if (matchResult) {
const content = rule.html(matchResult, parseInline);
return replaceMatch(text, matchResult, content, (txt) => [parse(txt, parseInline)]).join('');
}
return undefined;
};

View file

@ -0,0 +1,30 @@
import { MatchResult, MatchRule } from '../internal';
/**
* Type for a function that parses block-level markdown into HTML.
*
* @param text - The markdown text to be parsed.
* @param parseInline - Optional function to parse inline elements.
* @returns The parsed HTML.
*/
export type BlockMDParser = (text: string, parseInline?: (txt: string) => string) => string;
/**
* Type for a function that converts a block match to output.
*
* @param match - The match result.
* @param parseInline - Optional function to parse inline elements.
* @returns The output string after processing the match.
*/
export type BlockMatchConverter = (
match: MatchResult,
parseInline?: (txt: string) => string
) => string;
/**
* Type representing a block-level markdown rule that includes a matching pattern and HTML conversion.
*/
export type BlockMDRule = {
match: MatchRule; // A function that matches a specific markdown pattern.
html: BlockMatchConverter; // A function that converts the match to HTML.
};

View file

@ -0,0 +1,3 @@
export * from './utils';
export * from './block';
export * from './inline';

View file

@ -0,0 +1 @@
export * from './parser';

View file

@ -0,0 +1,40 @@
import {
BoldRule,
CodeRule,
EscapeRule,
ItalicRule1,
ItalicRule2,
LinkRule,
SpoilerRule,
StrikeRule,
UnderlineRule,
} from './rules';
import { runInlineRule, runInlineRules } from './runner';
import { InlineMDParser } from './type';
const LeveledRules = [
BoldRule,
ItalicRule1,
UnderlineRule,
ItalicRule2,
StrikeRule,
SpoilerRule,
LinkRule,
EscapeRule,
];
/**
* Parses inline markdown text into HTML using defined rules.
*
* @param text - The markdown text to be parsed.
* @returns The parsed HTML or the original text if no markdown was found.
*/
export const parseInlineMD: InlineMDParser = (text) => {
if (text === '') return text;
let result: string | undefined;
if (!result) result = runInlineRule(text, CodeRule, parseInlineMD);
if (!result) result = runInlineRules(text, LeveledRules, parseInlineMD);
return result ?? text;
};

View file

@ -0,0 +1,123 @@
import { InlineMDRule } from './type';
const MIN_ANY = '(.+?)';
const URL_NEG_LB = '(?<!(https?|ftp|mailto|magnet):\\/\\/\\S*)';
const ESC_NEG_LB = '(?<!\\\\)';
const BOLD_MD_1 = '**';
const BOLD_PREFIX_1 = `${ESC_NEG_LB}\\*{2}`;
const BOLD_NEG_LA_1 = '(?!\\*)';
const BOLD_REG_1 = new RegExp(
`${URL_NEG_LB}${BOLD_PREFIX_1}${MIN_ANY}${BOLD_PREFIX_1}${BOLD_NEG_LA_1}`
);
export const BoldRule: InlineMDRule = {
match: (text) => text.match(BOLD_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<strong data-md="${BOLD_MD_1}">${parse(g2)}</strong>`;
},
};
const ITALIC_MD_1 = '*';
const ITALIC_PREFIX_1 = `${ESC_NEG_LB}\\*`;
const ITALIC_NEG_LA_1 = '(?!\\*)';
const ITALIC_REG_1 = new RegExp(
`${URL_NEG_LB}${ITALIC_PREFIX_1}${MIN_ANY}${ITALIC_PREFIX_1}${ITALIC_NEG_LA_1}`
);
export const ItalicRule1: InlineMDRule = {
match: (text) => text.match(ITALIC_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<i data-md="${ITALIC_MD_1}">${parse(g2)}</i>`;
},
};
const ITALIC_MD_2 = '_';
const ITALIC_PREFIX_2 = `${ESC_NEG_LB}_`;
const ITALIC_NEG_LA_2 = '(?!_)';
const ITALIC_REG_2 = new RegExp(
`${URL_NEG_LB}${ITALIC_PREFIX_2}${MIN_ANY}${ITALIC_PREFIX_2}${ITALIC_NEG_LA_2}`
);
export const ItalicRule2: InlineMDRule = {
match: (text) => text.match(ITALIC_REG_2),
html: (parse, match) => {
const [, , g2] = match;
return `<i data-md="${ITALIC_MD_2}">${parse(g2)}</i>`;
},
};
const UNDERLINE_MD_1 = '__';
const UNDERLINE_PREFIX_1 = `${ESC_NEG_LB}_{2}`;
const UNDERLINE_NEG_LA_1 = '(?!_)';
const UNDERLINE_REG_1 = new RegExp(
`${URL_NEG_LB}${UNDERLINE_PREFIX_1}${MIN_ANY}${UNDERLINE_PREFIX_1}${UNDERLINE_NEG_LA_1}`
);
export const UnderlineRule: InlineMDRule = {
match: (text) => text.match(UNDERLINE_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<u data-md="${UNDERLINE_MD_1}">${parse(g2)}</u>`;
},
};
const STRIKE_MD_1 = '~~';
const STRIKE_PREFIX_1 = `${ESC_NEG_LB}~{2}`;
const STRIKE_NEG_LA_1 = '(?!~)';
const STRIKE_REG_1 = new RegExp(
`${URL_NEG_LB}${STRIKE_PREFIX_1}${MIN_ANY}${STRIKE_PREFIX_1}${STRIKE_NEG_LA_1}`
);
export const StrikeRule: InlineMDRule = {
match: (text) => text.match(STRIKE_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<s data-md="${STRIKE_MD_1}">${parse(g2)}</s>`;
},
};
const CODE_MD_1 = '`';
const CODE_PREFIX_1 = `${ESC_NEG_LB}\``;
const CODE_NEG_LA_1 = '(?!`)';
const CODE_REG_1 = new RegExp(`${URL_NEG_LB}${CODE_PREFIX_1}(.+?)${CODE_PREFIX_1}${CODE_NEG_LA_1}`);
export const CodeRule: InlineMDRule = {
match: (text) => text.match(CODE_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<code data-md="${CODE_MD_1}">${g2}</code>`;
},
};
const SPOILER_MD_1 = '||';
const SPOILER_PREFIX_1 = `${ESC_NEG_LB}\\|{2}`;
const SPOILER_NEG_LA_1 = '(?!\\|)';
const SPOILER_REG_1 = new RegExp(
`${URL_NEG_LB}${SPOILER_PREFIX_1}${MIN_ANY}${SPOILER_PREFIX_1}${SPOILER_NEG_LA_1}`
);
export const SpoilerRule: InlineMDRule = {
match: (text) => text.match(SPOILER_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return `<span data-md="${SPOILER_MD_1}" data-mx-spoiler>${parse(g2)}</span>`;
},
};
const LINK_ALT = `\\[${MIN_ANY}\\]`;
const LINK_URL = `\\((https?:\\/\\/.+?)\\)`;
const LINK_REG_1 = new RegExp(`${LINK_ALT}${LINK_URL}`);
export const LinkRule: InlineMDRule = {
match: (text) => text.match(LINK_REG_1),
html: (parse, match) => {
const [, g1, g2] = match;
return `<a data-md href="${g2}">${parse(g1)}</a>`;
},
};
export const INLINE_SEQUENCE_SET = '[*_~`|]';
const ESC_SEQ_1 = `\\\\(${INLINE_SEQUENCE_SET})`;
const ESC_REG_1 = new RegExp(`${URL_NEG_LB}${ESC_SEQ_1}`);
export const EscapeRule: InlineMDRule = {
match: (text) => text.match(ESC_REG_1),
html: (parse, match) => {
const [, , g2] = match;
return g2;
},
};

View file

@ -0,0 +1,62 @@
import { MatchResult, replaceMatch } from '../internal';
import { InlineMDParser, InlineMDRule } from './type';
/**
* Runs a single markdown rule on the provided text.
*
* @param text - The text to parse.
* @param rule - The markdown rule to run.
* @param parse - A function that run the parser on remaining parts.
* @returns The text with the markdown rule applied or `undefined` if no match is found.
*/
export const runInlineRule = (
text: string,
rule: InlineMDRule,
parse: InlineMDParser
): string | undefined => {
const matchResult = rule.match(text);
if (matchResult) {
const content = rule.html(parse, matchResult);
return replaceMatch(text, matchResult, content, (txt) => [parse(txt)]).join('');
}
return undefined;
};
/**
* Runs multiple rules at the same time to better handle nested rules.
* Rules will be run in the order they appear.
*
* @param text - The text to parse.
* @param rules - The markdown rules to run.
* @param parse - A function that run the parser on remaining parts.
* @returns The text with the markdown rules applied or `undefined` if no match is found.
*/
export const runInlineRules = (
text: string,
rules: InlineMDRule[],
parse: InlineMDParser
): string | undefined => {
const matchResults = rules.map((rule) => rule.match(text));
let targetRule: InlineMDRule | undefined;
let targetResult: MatchResult | undefined;
for (let i = 0; i < matchResults.length; i += 1) {
const currentResult = matchResults[i];
if (currentResult && typeof currentResult.index === 'number') {
if (
!targetResult ||
(typeof targetResult?.index === 'number' && currentResult.index < targetResult.index)
) {
targetResult = currentResult;
targetRule = rules[i];
}
}
}
if (targetRule && targetResult) {
const content = targetRule.html(parse, targetResult);
return replaceMatch(text, targetResult, content, (txt) => [parse(txt)]).join('');
}
return undefined;
};

View file

@ -0,0 +1,26 @@
import { MatchResult, MatchRule } from '../internal';
/**
* Type for a function that parses inline markdown into HTML.
*
* @param text - The markdown text to be parsed.
* @returns The parsed HTML.
*/
export type InlineMDParser = (text: string) => string;
/**
* Type for a function that converts a match to output.
*
* @param parse - The inline markdown parser function.
* @param match - The match result.
* @returns The output string after processing the match.
*/
export type InlineMatchConverter = (parse: InlineMDParser, match: MatchResult) => string;
/**
* Type representing a markdown rule that includes a matching pattern and HTML conversion.
*/
export type InlineMDRule = {
match: MatchRule; // A function that matches a specific markdown pattern.
html: InlineMatchConverter; // A function that converts the match to HTML.
};

View file

@ -0,0 +1 @@
export * from './utils';

View file

@ -0,0 +1,61 @@
/**
* @typedef {RegExpMatchArray | RegExpExecArray} MatchResult
*
* Represents the result of a regular expression match.
* This type can be either a `RegExpMatchArray` or a `RegExpExecArray`,
* which are returned when performing a match with a regular expression.
*
* @see {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec}
* @see {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match}
*/
export type MatchResult = RegExpMatchArray | RegExpExecArray;
/**
* @typedef {function(string): MatchResult | null} MatchRule
*
* A function type that takes a string and returns a `MatchResult` or `null` if no match is found.
*
* @param {string} text The string to match against.
* @returns {MatchResult | null} The result of the regular expression match, or `null` if no match is found.
*/
export type MatchRule = (text: string) => MatchResult | null;
/**
* Returns the part of the text before a match.
*
* @param text - The input text string.
* @param match - The match result (e.g., `RegExpMatchArray` or `RegExpExecArray`).
* @returns A string containing the part of the text before the match.
*/
export const beforeMatch = (text: string, match: RegExpMatchArray | RegExpExecArray): string =>
text.slice(0, match.index);
/**
* Returns the part of the text after a match.
*
* @param text - The input text string.
* @param match - The match result (e.g., `RegExpMatchArray` or `RegExpExecArray`).
* @returns A string containing the part of the text after the match.
*/
export const afterMatch = (text: string, match: RegExpMatchArray | RegExpExecArray): string =>
text.slice((match.index ?? 0) + match[0].length);
/**
* Replaces a match in the text with a content.
*
* @param text - The input text string.
* @param match - The match result (e.g., `RegExpMatchArray` or `RegExpExecArray`).
* @param content - The content to replace the match with.
* @param processPart - A function to further process remaining parts of the text.
* @returns An array containing the processed parts of the text, including the content.
*/
export const replaceMatch = <C>(
text: string,
match: MatchResult,
content: C,
processPart: (txt: string) => Array<string | C>
): Array<string | C> => [
...processPart(beforeMatch(text, match)),
content,
...processPart(afterMatch(text, match)),
];

View file

@ -0,0 +1,83 @@
import { findAndReplace } from '../../utils/findAndReplace';
import { ESC_BLOCK_SEQ, UN_ESC_BLOCK_SEQ } from './block/rules';
import { EscapeRule, INLINE_SEQUENCE_SET } from './inline/rules';
import { runInlineRule } from './inline/runner';
import { replaceMatch } from './internal';
/**
* Removes escape sequences from markdown inline elements in the given plain-text.
* This function unescapes characters that are escaped with backslashes (e.g., `\*`, `\_`)
* in markdown syntax, returning the original plain-text with markdown characters in effect.
*
* @param text - The input markdown plain-text containing escape characters (e.g., `"some \*italic\*"`)
* @returns The plain-text with markdown escape sequences removed (e.g., `"some *italic*"`)
*/
export const unescapeMarkdownInlineSequences = (text: string): string =>
runInlineRule(text, EscapeRule, (t) => {
if (t === '') return t;
return unescapeMarkdownInlineSequences(t);
}) ?? text;
/**
* Recovers the markdown escape sequences in the given plain-text.
* This function adds backslashes (`\`) before markdown characters that may need escaping
* (e.g., `*`, `_`) to ensure they are treated as literal characters and not part of markdown formatting.
*
* @param text - The input plain-text that may contain markdown sequences (e.g., `"some *italic*"`)
* @returns The plain-text with markdown escape sequences added (e.g., `"some \*italic\*"`)
*/
export const escapeMarkdownInlineSequences = (text: string): string => {
const regex = new RegExp(`(${INLINE_SEQUENCE_SET})`, 'g');
const parts = findAndReplace(
text,
regex,
(match) => {
const [, g1] = match;
return `\\${g1}`;
},
(t) => t
);
return parts.join('');
};
/**
* Removes escape sequences from markdown block elements in the given plain-text.
* This function unescapes characters that are escaped with backslashes (e.g., `\>`, `\#`)
* in markdown syntax, returning the original plain-text with markdown characters in effect.
*
* @param {string} text - The input markdown plain-text containing escape characters (e.g., `\> block quote`).
* @param {function} processPart - It takes the plain-text as input and returns a modified version of it.
* @returns {string} The plain-text with markdown escape sequences removed and markdown formatting applied.
*/
export const unescapeMarkdownBlockSequences = (
text: string,
processPart: (text: string) => string
): string => {
const match = text.match(ESC_BLOCK_SEQ);
if (!match) return processPart(text);
const [, g1] = match;
return replaceMatch(text, match, g1, (t) => [processPart(t)]).join('');
};
/**
* Escapes markdown block elements by adding backslashes before markdown characters
* (e.g., `\>`, `\#`) that are normally interpreted as markdown syntax.
*
* @param {string} text - The input markdown plain-text that may contain markdown elements (e.g., `> block quote`).
* @param {function} processPart - It takes the plain-text as input and returns a modified version of it.
* @returns {string} The plain-text with markdown escape sequences added, preventing markdown formatting.
*/
export const escapeMarkdownBlockSequences = (
text: string,
processPart: (text: string) => string
): string => {
const match = text.match(UN_ESC_BLOCK_SEQ);
if (!match) return processPart(text);
const [, g1] = match;
return replaceMatch(text, match, `\\${g1}`, (t) => [processPart(t)]).join('');
};