132 lines
4.5 KiB
TypeScript
132 lines
4.5 KiB
TypeScript
/**
|
|
* Convert HTML to Markdown
|
|
* Simple converter for rich text editor output
|
|
*/
|
|
|
|
export function htmlToMarkdown(html: string): string {
|
|
if (!html) return '';
|
|
|
|
let markdown = html;
|
|
|
|
// Store aligned headings for preservation
|
|
const alignedHeadings: { [key: string]: string } = {};
|
|
let headingIndex = 0;
|
|
|
|
// Process headings with potential style attributes
|
|
for (let level = 1; level <= 4; level++) {
|
|
const hashes = '#'.repeat(level);
|
|
markdown = markdown.replace(new RegExp(`<h${level}([^>]*)>(.*?)</h${level}>`, 'gis'), (match, attrs, content) => {
|
|
// Check for text-align in style attribute
|
|
const alignMatch = attrs.match(/text-align:\s*(center|right)/i);
|
|
if (alignMatch) {
|
|
const align = alignMatch[1].toLowerCase();
|
|
const placeholder = `[[HEADING${headingIndex}]]`;
|
|
alignedHeadings[placeholder] = `<h${level} style="text-align: ${align};">${content}</h${level}>`;
|
|
headingIndex++;
|
|
return placeholder + '\n\n';
|
|
}
|
|
// No alignment, convert to markdown
|
|
return `${hashes} ${content}\n\n`;
|
|
});
|
|
}
|
|
|
|
// Bold
|
|
markdown = markdown.replace(/<strong>(.*?)<\/strong>/gi, '**$1**');
|
|
markdown = markdown.replace(/<b>(.*?)<\/b>/gi, '**$1**');
|
|
|
|
// Italic
|
|
markdown = markdown.replace(/<em>(.*?)<\/em>/gi, '*$1*');
|
|
markdown = markdown.replace(/<i>(.*?)<\/i>/gi, '*$1*');
|
|
|
|
// TipTap buttons - detect by data-button attribute, BEFORE generic links
|
|
// Format: <a data-button data-style="solid" data-href="..." data-text="...">text</a>
|
|
// or: <a href="..." class="button..." data-button ...>text</a>
|
|
markdown = markdown.replace(/<a[^>]*data-button[^>]*>(.*?)<\/a>/gi, (match, text) => {
|
|
// Extract style from data-style or class
|
|
let style = 'solid';
|
|
const styleMatch = match.match(/data-style=["'](\w+)["']/);
|
|
if (styleMatch) {
|
|
style = styleMatch[1];
|
|
} else if (match.includes('button-outline') || match.includes('outline')) {
|
|
style = 'outline';
|
|
}
|
|
|
|
// Extract href from data-href or href attribute
|
|
let url = '#';
|
|
const dataHrefMatch = match.match(/data-href=["']([^"']+)["']/);
|
|
const hrefMatch = match.match(/href=["']([^"']+)["']/);
|
|
if (dataHrefMatch) {
|
|
url = dataHrefMatch[1];
|
|
} else if (hrefMatch) {
|
|
url = hrefMatch[1];
|
|
}
|
|
|
|
return `[button:${style}](${url})${text.trim()}[/button]`;
|
|
});
|
|
|
|
// Regular links (not buttons)
|
|
markdown = markdown.replace(/<a\s+href="([^"]+)"[^>]*>(.*?)<\/a>/gi, '[$2]($1)');
|
|
|
|
// Lists
|
|
markdown = markdown.replace(/<ul[^>]*>(.*?)<\/ul>/gis, (match, content) => {
|
|
const items = content.match(/<li[^>]*>(.*?)<\/li>/gis) || [];
|
|
return items.map((item: string) => {
|
|
const text = item.replace(/<li[^>]*>(.*?)<\/li>/is, '$1').trim();
|
|
return `- ${text}`;
|
|
}).join('\n') + '\n\n';
|
|
});
|
|
|
|
markdown = markdown.replace(/<ol[^>]*>(.*?)<\/ol>/gis, (match, content) => {
|
|
const items = content.match(/<li[^>]*>(.*?)<\/li>/gis) || [];
|
|
return items.map((item: string, index: number) => {
|
|
const text = item.replace(/<li[^>]*>(.*?)<\/li>/is, '$1').trim();
|
|
return `${index + 1}. ${text}`;
|
|
}).join('\n') + '\n\n';
|
|
});
|
|
|
|
// Paragraphs - preserve text-align by using placeholders
|
|
const alignedParagraphs: { [key: string]: string } = {};
|
|
let alignIndex = 0;
|
|
markdown = markdown.replace(/<p([^>]*)>(.*?)<\/p>/gis, (match, attrs, content) => {
|
|
// Check for text-align in style attribute
|
|
const alignMatch = attrs.match(/text-align:\s*(center|right)/i);
|
|
if (alignMatch) {
|
|
const align = alignMatch[1].toLowerCase();
|
|
// Use double-bracket placeholder that won't be matched by HTML regex
|
|
const placeholder = `[[ALIGN${alignIndex}]]`;
|
|
alignedParagraphs[placeholder] = `<p style="text-align: ${align};">${content}</p>`;
|
|
alignIndex++;
|
|
return placeholder + '\n\n';
|
|
}
|
|
// No alignment, convert to plain text
|
|
return `${content}\n\n`;
|
|
});
|
|
|
|
// Line breaks
|
|
markdown = markdown.replace(/<br\s*\/?>/gi, '\n');
|
|
|
|
// Horizontal rules
|
|
markdown = markdown.replace(/<hr\s*\/?>/gi, '\n---\n\n');
|
|
|
|
// Remove remaining HTML tags
|
|
markdown = markdown.replace(/<[^>]+>/g, '');
|
|
|
|
// Restore aligned paragraphs
|
|
Object.entries(alignedParagraphs).forEach(([placeholder, html]) => {
|
|
markdown = markdown.replace(placeholder, html);
|
|
});
|
|
|
|
// Restore aligned headings
|
|
Object.entries(alignedHeadings).forEach(([placeholder, html]) => {
|
|
markdown = markdown.replace(placeholder, html);
|
|
});
|
|
|
|
// Clean up excessive newlines
|
|
markdown = markdown.replace(/\n{3,}/g, '\n\n');
|
|
|
|
// Trim
|
|
markdown = markdown.trim();
|
|
|
|
return markdown;
|
|
}
|