first commit all files

This commit is contained in:
dwindown
2026-01-28 00:26:00 +07:00
parent 65dd207a74
commit 97426d5ab1
72 changed files with 91484 additions and 0 deletions

View File

@@ -0,0 +1,788 @@
<?php
/**
* Markdown to Gutenberg Blocks Parser
*
* Converts Markdown content to WordPress Gutenberg blocks.
*
* @package WP_Agentic_Writer
*/
if ( ! defined( 'ABSPATH' ) ) {
exit;
}
/**
* Class WP_Agentic_Writer_Markdown_Parser
*
* @since 0.1.0
*/
class WP_Agentic_Writer_Markdown_Parser {
/**
* Parse Markdown content and convert to Gutenberg blocks.
*
* @since 0.1.0
* @param string $markdown Markdown content.
* @return array Array of Gutenberg blocks.
*/
public static function parse( $markdown ) {
$markdown = self::normalize_markdown( $markdown );
$blocks = array();
$lines = explode( "\n", $markdown );
$current_paragraph = '';
$in_code_block = false;
$in_list = false;
$list_items = array();
$list_type = 'ul'; // 'ul' or 'ol'
$code_lines = array();
$code_language = '';
$in_auto_code_block = false;
$auto_code_lines = array();
$auto_code_language = 'text';
$is_code_like_line = function( $trimmed ) {
if ( '' === $trimmed ) {
return false;
}
if ( preg_match( '/^(<\\?php|define\\s*\\(|\\$[A-Za-z_]|function\\s+\\w+|class\\s+\\w+|if\\s*\\(|elseif\\s*\\(|else\\b|foreach\\s*\\(|for\\s*\\(|while\\s*\\(|switch\\s*\\(|case\\s+|echo\\s+|return\\s+|const\\s+|public\\s+|private\\s+|protected\\s+)/i', $trimmed ) ) {
return true;
}
if ( preg_match( '/[;{}]$/', $trimmed ) && preg_match( '/[()$=]|->|::/', $trimmed ) ) {
return true;
}
return false;
};
$line_count = count( $lines );
for ( $i = 0; $i < $line_count; $i++ ) {
$line = $lines[ $i ];
$trimmed = trim( $line );
if ( $in_auto_code_block ) {
if ( '' === $trimmed ) {
$blocks[] = self::create_code_block( $auto_code_language, implode( "\n", $auto_code_lines ) );
$auto_code_lines = array();
$auto_code_language = 'text';
$in_auto_code_block = false;
continue;
}
if ( $is_code_like_line( $trimmed ) || preg_match( '/^\\s+/', $line ) ) {
$auto_code_lines[] = $line;
continue;
}
$blocks[] = self::create_code_block( $auto_code_language, implode( "\n", $auto_code_lines ) );
$auto_code_lines = array();
$auto_code_language = 'text';
$in_auto_code_block = false;
// Continue processing current line normally below.
}
// Handle image placeholders: [IMAGE: description]
if ( preg_match( '/^\[IMAGE:\s*(.+)\]$/i', $trimmed, $matches ) ) {
// Flush any pending paragraph.
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
// Flush any pending list.
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
// Create image placeholder block.
$blocks[] = self::create_image_placeholder_block( $matches[1] );
continue;
}
// Handle CTA/Button placeholders: [CTA: text] or [CTA: text (url)]
if ( preg_match( '/^\[CTA:\s*(.+)\]$/i', $trimmed, $matches ) ) {
// Flush any pending paragraph.
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
// Flush any pending list.
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
// Create button block.
$blocks[] = self::create_button_block( $matches[1] );
continue;
}
// Detect unfenced code lines and create a code block automatically.
if ( ! $in_code_block && $is_code_like_line( $trimmed ) ) {
// Flush any pending paragraph.
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
// Flush any pending list.
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
$auto_code_language = preg_match( '/^(<\\?php|define\\s*\\(|\\$[A-Za-z_])/', $trimmed ) ? 'php' : 'text';
$in_auto_code_block = true;
$auto_code_lines[] = $line;
continue;
}
// Handle code blocks.
if ( preg_match( '/^```(\w*)/', $trimmed, $matches ) ) {
if ( $in_code_block ) {
// End code block.
$blocks[] = self::create_code_block( $code_language, implode( "\n", $code_lines ) );
$code_lines = array();
$code_language = '';
$in_code_block = false;
} else {
// Start code block.
// Flush any pending paragraph.
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
// Flush any pending list.
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
$in_code_block = true;
$code_language = $matches[1];
}
continue;
}
if ( $in_code_block ) {
$code_lines[] = $line;
continue;
}
// Handle headings.
if ( preg_match( '/^(#{1,6})\s+(.+)$/', $trimmed, $matches ) ) {
// Flush any pending paragraph.
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
// Flush any pending list.
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
$level = strlen( $matches[1] );
$content = $matches[2];
$blocks[] = self::create_heading_block( $content, $level );
continue;
}
// Handle markdown tables (header + separator + rows).
if ( ! $in_list && self::is_table_row( $trimmed ) && $i + 1 < $line_count ) {
$next_line = trim( $lines[ $i + 1 ] );
if ( self::is_table_separator( $next_line ) ) {
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
$headers = self::split_table_row( $trimmed );
$rows = array();
$i += 2;
for ( ; $i < $line_count; $i++ ) {
$row_line = trim( $lines[ $i ] );
if ( '' === $row_line || ! self::is_table_row( $row_line ) ) {
$i -= 1;
break;
}
$rows[] = self::split_table_row( $row_line );
}
$blocks[] = self::create_table_block( $headers, $rows );
continue;
}
}
// Handle horizontal rules.
if ( preg_match( '/^[-*_]{3,}/', $trimmed ) ) {
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
// Gutenberg doesn't have a native separator block, use a spacer.
$blocks[] = array(
'blockName' => 'core/spacer',
'attrs' => array(
'height' => '20px',
),
'innerBlocks' => array(),
'innerContent' => array(),
);
continue;
}
// Handle unordered lists (supports common dash/bullet variants).
if ( preg_match( '/^[\*\-+\x{2022}\x{2023}\x{2013}\x{2014}]\s+(.+)$/u', $trimmed, $matches ) ) {
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
if ( $in_list && $list_type !== 'ul' ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
}
$in_list = true;
$list_type = 'ul';
$list_items[] = self::parse_inline_markdown( $matches[1] );
continue;
}
// Handle ordered lists.
if ( preg_match( '/^\d+\.\s+(.+)$/', $trimmed, $matches ) ) {
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
if ( $in_list && $list_type !== 'ol' ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
}
$in_list = true;
$list_type = 'ol';
$list_items[] = self::parse_inline_markdown( $matches[1] );
continue;
}
// Handle blockquotes.
if ( preg_match( '/^>\s+(.+)$/', $trimmed, $matches ) ) {
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
$blocks[] = self::create_quote_block( $matches[1] );
continue;
}
// Handle empty lines.
if ( empty( $trimmed ) ) {
// Flush paragraph.
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
// Flush list.
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
$list_items = array();
$in_list = false;
}
continue;
}
// Accumulate paragraph text.
if ( ! empty( $current_paragraph ) ) {
$current_paragraph .= ' ' . $trimmed;
} else {
$current_paragraph = $trimmed;
}
// Check if paragraph ends with punctuation suggesting end of sentence.
if ( preg_match( '/[.!?]$/', $trimmed ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
$current_paragraph = '';
}
}
if ( $in_auto_code_block && ! empty( $auto_code_lines ) ) {
$blocks[] = self::create_code_block( $auto_code_language, implode( "\n", $auto_code_lines ) );
}
// Flush any remaining content.
if ( ! empty( $current_paragraph ) ) {
$blocks[] = self::create_paragraph_block( $current_paragraph );
}
if ( $in_list ) {
$blocks[] = self::create_list_block( $list_type, $list_items );
}
// Merge consecutive ordered lists (fix 1. 1. 1. issue)
$merged_blocks = self::merge_consecutive_ordered_lists( $blocks );
// Remove duplicate adjacent headings before returning
$cleaned_blocks = array();
$last_heading_content = null;
foreach ( $merged_blocks as $block ) {
if ( isset( $block['blockName'] ) && 'core/heading' === $block['blockName'] ) {
if ( preg_match( '/<h[1-6]>(.+?)<\/h[1-6]>/i', $block['innerHTML'], $matches ) ) {
$current_heading = trim( $matches[1] );
// Skip if duplicate of last heading (case-insensitive)
if ( null !== $last_heading_content && strtolower( $current_heading ) === strtolower( $last_heading_content ) ) {
continue;
}
$last_heading_content = $current_heading;
}
} else {
$last_heading_content = null;
}
$cleaned_blocks[] = $block;
}
return $cleaned_blocks;
}
/**
* Parse inline Markdown elements (bold, italic, code, links).
*
* @since 0.1.0
* @param string $text Text with inline Markdown.
* @return array HTML content with inline formatting.
*/
private static function parse_inline_markdown( $text ) {
// Convert inline code.
$text = preg_replace( '/`([^`]+)`/', '<code>$1</code>', $text );
// Convert bold.
$text = preg_replace( '/\*\*(.+?)\*\*/', '<strong>$1</strong>', $text );
$text = preg_replace( '/__(.+?)__/', '<strong>$1</strong>', $text );
// Convert italic.
$text = preg_replace( '/\*(.+?)\*/', '<em>$1</em>', $text );
$text = preg_replace( '/_(.+?)_/', '<em>$1</em>', $text );
// Convert links.
$text = preg_replace( '/\[(.+?)\]\((.+?)\)/', '<a href="$2">$1</a>', $text );
return $text;
}
/**
* Normalize markdown input to improve block parsing.
*
* @since 0.1.0
* @param string $markdown Raw markdown or HTML-ish content.
* @return string
*/
private static function normalize_markdown( $markdown ) {
if ( null === $markdown ) {
return '';
}
$markdown = (string) $markdown;
$markdown = str_replace(
array( '&#8211;', '&#8212;', '&ndash;', '&mdash;', '&bull;' ),
'-',
$markdown
);
$markdown = preg_replace( '/<br\\s*\\/?>/i', "\n", $markdown );
$markdown = preg_replace( '/<\\/p>\\s*<p>/i', "\n\n", $markdown );
$markdown = preg_replace( '/<\\/?p>/i', '', $markdown );
$markdown = preg_replace( '/!\\[([^\\]]*)\\]\\([^\\)]*\\)/', '[IMAGE: $1]', $markdown );
$markdown = preg_replace( '/\\[IMAGE:\\s*([^\\]]+)\\]/i', "\n[IMAGE: $1]\n", $markdown );
return $markdown;
}
/**
* Check if a line looks like a markdown table row.
*
* @since 0.1.0
* @param string $line Line content.
* @return bool
*/
private static function is_table_row( $line ) {
if ( '' === $line ) {
return false;
}
return false !== strpos( $line, '|' );
}
/**
* Check if a line is a markdown table separator.
*
* @since 0.1.0
* @param string $line Line content.
* @return bool
*/
private static function is_table_separator( $line ) {
return (bool) preg_match( '/^\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?$/', $line );
}
/**
* Split a markdown table row into cells.
*
* @since 0.1.0
* @param string $line Row line.
* @return array
*/
private static function split_table_row( $line ) {
$trimmed = trim( $line );
$trimmed = trim( $trimmed, " \t|" );
if ( '' === $trimmed ) {
return array();
}
return array_map( 'trim', explode( '|', $trimmed ) );
}
/**
* Create a heading block.
*
* @since 0.1.0
* @param string $content Heading content.
* @param int $level Heading level (1-6).
* @return array Gutenberg block.
*/
private static function create_heading_block( $content, $level ) {
$level = min( max( $level, 1 ), 6 ); // Ensure level is between 1-6.
$parsed_content = self::parse_inline_markdown( $content );
$html = '<h' . $level . '>' . $parsed_content . '</h' . $level . '>';
return array(
'blockName' => 'core/heading',
'attrs' => array(
'level' => $level,
'content' => $parsed_content,
),
'innerBlocks' => array(),
'innerContent' => array( $html ),
'innerHTML' => $html,
);
}
/**
* Create a paragraph block.
*
* @since 0.1.0
* @param string $content Paragraph content.
* @return array Gutenberg block.
*/
private static function create_paragraph_block( $content ) {
$parsed_content = self::parse_inline_markdown( $content );
$html = '<p>' . $parsed_content . '</p>';
return array(
'blockName' => 'core/paragraph',
'attrs' => array(
'content' => $parsed_content,
),
'innerBlocks' => array(),
'innerContent' => array( $html ),
'innerHTML' => $html,
);
}
/**
* Create a table block.
*
* @since 0.1.0
* @param array $headers Table headers.
* @param array $rows Table rows.
* @return array Gutenberg block.
*/
private static function create_table_block( $headers, $rows ) {
$header_cells = array();
foreach ( $headers as $header ) {
$header_cells[] = '<th>' . self::parse_inline_markdown( $header ) . '</th>';
}
$tbody_rows = array();
foreach ( $rows as $row ) {
$cells = array();
foreach ( $row as $cell ) {
$cells[] = '<td>' . self::parse_inline_markdown( $cell ) . '</td>';
}
if ( empty( $cells ) ) {
continue;
}
$tbody_rows[] = '<tr>' . implode( '', $cells ) . '</tr>';
}
$thead_html = '<thead><tr>' . implode( '', $header_cells ) . '</tr></thead>';
$tbody_html = '<tbody>' . implode( '', $tbody_rows ) . '</tbody>';
$table_html = '<figure class="wp-block-table"><table>' . $thead_html . $tbody_html . '</table></figure>';
return array(
'blockName' => 'core/table',
'attrs' => array(
'hasFixedLayout' => true,
),
'innerBlocks' => array(),
'innerContent' => array( $table_html ),
'innerHTML' => $table_html,
);
}
/**
* Create a code block.
*
* @since 0.1.0
* @param string $language Programming language.
* @param string $code Code content.
* @return array Gutenberg block.
*/
private static function create_code_block( $language, $code ) {
// Escape HTML entities in code.
$escaped_code = htmlspecialchars( $code, ENT_NOQUOTES, 'UTF-8' );
return array(
'blockName' => 'core/code',
'attrs' => array(
'content' => $code,
'language' => $language ?: 'text',
),
'innerBlocks' => array(),
'innerContent' => array(),
'innerHTML' => '<pre class="wp-block-code"><code>' . $escaped_code . '</code></pre>',
);
}
/**
* Create a list block.
*
* @since 0.1.0
* @param string $type List type ('ul' or 'ol').
* @param array $items List items.
* @return array Gutenberg block.
*/
private static function create_list_block( $type, $items ) {
$tag = $type === 'ol' ? 'ol' : 'ul';
$html = '<' . $tag . '>';
// Create inner blocks for each list item
$inner_blocks = array();
$inner_content = array();
foreach ( $items as $item ) {
$item_content = self::parse_inline_markdown( $item );
$li_html = '<li>' . $item_content . '</li>';
$html .= $li_html;
$inner_content[] = $li_html;
// Create list item with content in attrs
$inner_blocks[] = array(
'blockName' => 'core/list-item',
'attrs' => array(
'content' => $item_content,
),
'innerBlocks' => array(),
'innerContent' => array( $li_html ),
'innerHTML' => $li_html,
);
}
$html .= '</' . $tag . '>';
return array(
'blockName' => 'core/list',
'attrs' => array(
'ordered' => $type === 'ol',
),
'innerBlocks' => $inner_blocks,
'innerContent' => $inner_content,
'innerHTML' => $html,
);
}
/**
* Create a quote block.
*
* @since 0.1.0
* @param string $content Quote content.
* @return array Gutenberg block.
*/
private static function create_quote_block( $content ) {
$parsed_content = self::parse_inline_markdown( $content );
$html = '<blockquote class="wp-block-quote"><p>' . $parsed_content . '</p></blockquote>';
return array(
'blockName' => 'core/quote',
'attrs' => array(
'value' => $parsed_content,
),
'innerBlocks' => array(),
'innerContent' => array( $html ),
'innerHTML' => $html,
);
}
/**
* Create an image placeholder block.
*
* @since 0.1.0
* @param string $description Image description/alt text.
* @return array Gutenberg block.
*/
private static function create_image_placeholder_block( $description ) {
$alt = trim( $description );
$attrs = array(
'id' => 0,
'url' => '',
'alt' => $alt,
'caption' => '',
'sizeSlug' => 'large',
'linkDestination' => 'none',
);
$html = '<figure class="wp-block-image size-large"><img alt="' . esc_attr( $alt ) . '" /></figure>';
return array(
'blockName' => 'core/image',
'attrs' => $attrs,
'innerBlocks' => array(),
'innerContent' => array(),
'innerHTML' => $html,
);
}
/**
* Merge consecutive ordered lists into one continuous list.
* Fixes the "1. 1. 1." issue when numbered items are separated by other content.
*
* @since 0.1.0
* @param array $blocks Array of blocks.
* @return array Merged blocks.
*/
private static function merge_consecutive_ordered_lists( $blocks ) {
$result = array();
$pending_ol = null;
$pending_ol_items = array();
foreach ( $blocks as $block ) {
$is_ordered_list = isset( $block['blockName'] )
&& 'core/list' === $block['blockName']
&& ! empty( $block['attrs']['ordered'] );
if ( $is_ordered_list ) {
// Accumulate ordered list items
if ( ! empty( $block['innerBlocks'] ) ) {
foreach ( $block['innerBlocks'] as $item ) {
$pending_ol_items[] = $item;
}
}
if ( null === $pending_ol ) {
$pending_ol = $block;
}
} else {
// Flush pending ordered list if we have one
if ( null !== $pending_ol && ! empty( $pending_ol_items ) ) {
$result[] = self::rebuild_ordered_list( $pending_ol_items );
$pending_ol = null;
$pending_ol_items = array();
}
$result[] = $block;
}
}
// Flush any remaining ordered list
if ( null !== $pending_ol && ! empty( $pending_ol_items ) ) {
$result[] = self::rebuild_ordered_list( $pending_ol_items );
}
return $result;
}
/**
* Rebuild an ordered list from accumulated items.
*
* @since 0.1.0
* @param array $items List item blocks.
* @return array Ordered list block.
*/
private static function rebuild_ordered_list( $items ) {
$html = '<ol>';
$inner_content = array();
foreach ( $items as $item ) {
$li_html = isset( $item['innerHTML'] ) ? $item['innerHTML'] : '<li></li>';
$html .= $li_html;
$inner_content[] = $li_html;
}
$html .= '</ol>';
return array(
'blockName' => 'core/list',
'attrs' => array(
'ordered' => true,
),
'innerBlocks' => $items,
'innerContent' => $inner_content,
'innerHTML' => $html,
);
}
/**
* Create a button block from CTA syntax.
*
* @since 0.1.0
* @param string $content CTA content (may include URL in parentheses).
* @return array Gutenberg block.
*/
private static function create_button_block( $content ) {
$text = trim( $content );
$url = '#';
// Check for URL in parentheses: "Button Text (https://example.com)"
if ( preg_match( '/^(.+?)\s*\(([^)]+)\)\s*$/', $content, $matches ) ) {
$text = trim( $matches[1] );
$url = trim( $matches[2] );
}
// Clean up common patterns like "Link ke..." or "(Link..."
$text = preg_replace( '/\s*\(Link\s+ke\s+.*$/i', '', $text );
$text = preg_replace( '/\s*\(Link\s+.*$/i', '', $text );
$button_html = '<div class="wp-block-button"><a class="wp-block-button__link wp-element-button" href="' . esc_attr( $url ) . '">' . esc_html( $text ) . '</a></div>';
$button_block = array(
'blockName' => 'core/button',
'attrs' => array(
'text' => $text,
'url' => $url,
),
'innerBlocks' => array(),
'innerContent' => array( $button_html ),
'innerHTML' => $button_html,
);
// Wrap in buttons container
$wrapper_html = '<div class="wp-block-buttons">' . $button_html . '</div>';
return array(
'blockName' => 'core/buttons',
'attrs' => array(),
'innerBlocks' => array( $button_block ),
'innerContent' => array( $wrapper_html ),
'innerHTML' => $wrapper_html,
);
}
}