Files
WooNooW/admin-spa/src/lib/html-to-markdown.ts

132 lines
4.5 KiB
TypeScript

/**
* Convert HTML to Markdown
* Simple converter for rich text editor output
*/
export function htmlToMarkdown(html: string): string {
if (!html) return '';
let markdown = html;
// Store aligned headings for preservation
const alignedHeadings: { [key: string]: string } = {};
let headingIndex = 0;
// Process headings with potential style attributes
for (let level = 1; level <= 4; level++) {
const hashes = '#'.repeat(level);
markdown = markdown.replace(new RegExp(`<h${level}([^>]*)>(.*?)</h${level}>`, 'gis'), (match, attrs, content) => {
// Check for text-align in style attribute
const alignMatch = attrs.match(/text-align:\s*(center|right)/i);
if (alignMatch) {
const align = alignMatch[1].toLowerCase();
const placeholder = `[[HEADING${headingIndex}]]`;
alignedHeadings[placeholder] = `<h${level} style="text-align: ${align};">${content}</h${level}>`;
headingIndex++;
return placeholder + '\n\n';
}
// No alignment, convert to markdown
return `${hashes} ${content}\n\n`;
});
}
// Bold
markdown = markdown.replace(/<strong>(.*?)<\/strong>/gi, '**$1**');
markdown = markdown.replace(/<b>(.*?)<\/b>/gi, '**$1**');
// Italic
markdown = markdown.replace(/<em>(.*?)<\/em>/gi, '*$1*');
markdown = markdown.replace(/<i>(.*?)<\/i>/gi, '*$1*');
// TipTap buttons - detect by data-button attribute, BEFORE generic links
// Format: <a data-button data-style="solid" data-href="..." data-text="...">text</a>
// or: <a href="..." class="button..." data-button ...>text</a>
markdown = markdown.replace(/<a[^>]*data-button[^>]*>(.*?)<\/a>/gi, (match, text) => {
// Extract style from data-style or class
let style = 'solid';
const styleMatch = match.match(/data-style=["'](\w+)["']/);
if (styleMatch) {
style = styleMatch[1];
} else if (match.includes('button-outline') || match.includes('outline')) {
style = 'outline';
}
// Extract href from data-href or href attribute
let url = '#';
const dataHrefMatch = match.match(/data-href=["']([^"']+)["']/);
const hrefMatch = match.match(/href=["']([^"']+)["']/);
if (dataHrefMatch) {
url = dataHrefMatch[1];
} else if (hrefMatch) {
url = hrefMatch[1];
}
return `[button:${style}](${url})${text.trim()}[/button]`;
});
// Regular links (not buttons)
markdown = markdown.replace(/<a\s+href="([^"]+)"[^>]*>(.*?)<\/a>/gi, '[$2]($1)');
// Lists
markdown = markdown.replace(/<ul[^>]*>(.*?)<\/ul>/gis, (match, content) => {
const items = content.match(/<li[^>]*>(.*?)<\/li>/gis) || [];
return items.map((item: string) => {
const text = item.replace(/<li[^>]*>(.*?)<\/li>/is, '$1').trim();
return `- ${text}`;
}).join('\n') + '\n\n';
});
markdown = markdown.replace(/<ol[^>]*>(.*?)<\/ol>/gis, (match, content) => {
const items = content.match(/<li[^>]*>(.*?)<\/li>/gis) || [];
return items.map((item: string, index: number) => {
const text = item.replace(/<li[^>]*>(.*?)<\/li>/is, '$1').trim();
return `${index + 1}. ${text}`;
}).join('\n') + '\n\n';
});
// Paragraphs - preserve text-align by using placeholders
const alignedParagraphs: { [key: string]: string } = {};
let alignIndex = 0;
markdown = markdown.replace(/<p([^>]*)>(.*?)<\/p>/gis, (match, attrs, content) => {
// Check for text-align in style attribute
const alignMatch = attrs.match(/text-align:\s*(center|right)/i);
if (alignMatch) {
const align = alignMatch[1].toLowerCase();
// Use double-bracket placeholder that won't be matched by HTML regex
const placeholder = `[[ALIGN${alignIndex}]]`;
alignedParagraphs[placeholder] = `<p style="text-align: ${align};">${content}</p>`;
alignIndex++;
return placeholder + '\n\n';
}
// No alignment, convert to plain text
return `${content}\n\n`;
});
// Line breaks
markdown = markdown.replace(/<br\s*\/?>/gi, '\n');
// Horizontal rules
markdown = markdown.replace(/<hr\s*\/?>/gi, '\n---\n\n');
// Remove remaining HTML tags
markdown = markdown.replace(/<[^>]+>/g, '');
// Restore aligned paragraphs
Object.entries(alignedParagraphs).forEach(([placeholder, html]) => {
markdown = markdown.replace(placeholder, html);
});
// Restore aligned headings
Object.entries(alignedHeadings).forEach(([placeholder, html]) => {
markdown = markdown.replace(placeholder, html);
});
// Clean up excessive newlines
markdown = markdown.replace(/\n{3,}/g, '\n\n');
// Trim
markdown = markdown.trim();
return markdown;
}