/** * Convert HTML to Markdown * Simple converter for rich text editor output */ export function htmlToMarkdown(html: string): string { if (!html) return ''; let markdown = html; // Store aligned headings for preservation const alignedHeadings: { [key: string]: string } = {}; let headingIndex = 0; // Process headings with potential style attributes for (let level = 1; level <= 4; level++) { const hashes = '#'.repeat(level); markdown = markdown.replace(new RegExp(`]*)>(.*?)`, 'gis'), (match, attrs, content) => { // Check for text-align in style attribute const alignMatch = attrs.match(/text-align:\s*(center|right)/i); if (alignMatch) { const align = alignMatch[1].toLowerCase(); const placeholder = `[[HEADING${headingIndex}]]`; alignedHeadings[placeholder] = `${content}`; headingIndex++; return placeholder + '\n\n'; } // No alignment, convert to markdown return `${hashes} ${content}\n\n`; }); } // Bold markdown = markdown.replace(/(.*?)<\/strong>/gi, '**$1**'); markdown = markdown.replace(/(.*?)<\/b>/gi, '**$1**'); // Italic markdown = markdown.replace(/(.*?)<\/em>/gi, '*$1*'); markdown = markdown.replace(/(.*?)<\/i>/gi, '*$1*'); // TipTap buttons - detect by data-button attribute, BEFORE generic links // Format: text // or: text markdown = markdown.replace(/]*data-button[^>]*>(.*?)<\/a>/gi, (match, text) => { // Extract style from data-style or class let style = 'solid'; const styleMatch = match.match(/data-style=["'](\w+)["']/); if (styleMatch) { style = styleMatch[1]; } else if (match.includes('button-outline') || match.includes('outline')) { style = 'outline'; } // Extract href from data-href or href attribute let url = '#'; const dataHrefMatch = match.match(/data-href=["']([^"']+)["']/); const hrefMatch = match.match(/href=["']([^"']+)["']/); if (dataHrefMatch) { url = dataHrefMatch[1]; } else if (hrefMatch) { url = hrefMatch[1]; } return `[button:${style}](${url})${text.trim()}[/button]`; }); // Regular links (not buttons) markdown = markdown.replace(/]*>(.*?)<\/a>/gi, '[$2]($1)'); // Lists markdown = markdown.replace(/]*>(.*?)<\/ul>/gis, (match, content) => { const items = content.match(/]*>(.*?)<\/li>/gis) || []; return items.map((item: string) => { const text = item.replace(/]*>(.*?)<\/li>/is, '$1').trim(); return `- ${text}`; }).join('\n') + '\n\n'; }); markdown = markdown.replace(/]*>(.*?)<\/ol>/gis, (match, content) => { const items = content.match(/]*>(.*?)<\/li>/gis) || []; return items.map((item: string, index: number) => { const text = item.replace(/]*>(.*?)<\/li>/is, '$1').trim(); return `${index + 1}. ${text}`; }).join('\n') + '\n\n'; }); // Paragraphs - preserve text-align by using placeholders const alignedParagraphs: { [key: string]: string } = {}; let alignIndex = 0; markdown = markdown.replace(/]*)>(.*?)<\/p>/gis, (match, attrs, content) => { // Check for text-align in style attribute const alignMatch = attrs.match(/text-align:\s*(center|right)/i); if (alignMatch) { const align = alignMatch[1].toLowerCase(); // Use double-bracket placeholder that won't be matched by HTML regex const placeholder = `[[ALIGN${alignIndex}]]`; alignedParagraphs[placeholder] = `
${content}
`; alignIndex++; return placeholder + '\n\n'; } // No alignment, convert to plain text return `${content}\n\n`; }); // Line breaks markdown = markdown.replace(//gi, '\n'); // Horizontal rules markdown = markdown.replace(//gi, '\n---\n\n'); // Remove remaining HTML tags markdown = markdown.replace(/<[^>]+>/g, ''); // Restore aligned paragraphs Object.entries(alignedParagraphs).forEach(([placeholder, html]) => { markdown = markdown.replace(placeholder, html); }); // Restore aligned headings Object.entries(alignedHeadings).forEach(([placeholder, html]) => { markdown = markdown.replace(placeholder, html); }); // Clean up excessive newlines markdown = markdown.replace(/\n{3,}/g, '\n\n'); // Trim markdown = markdown.trim(); return markdown; }