Improve ObjectEditor and Add TableEditor

2025-09-23 14:17:13 +07:00
parent cf750114f7
commit 977e784df2
15 changed files with 5329 additions and 345 deletions
--- a/src/utils/browserCompat.js
+++ b/src/utils/browserCompat.js
@@ -0,0 +1,147 @@
+// Browser compatibility utilities for handling different browser environments
+
+/**
+ * Detect if the app is running in Telegram's built-in browser
+ */
+export const isTelegramBrowser = () => {
+  const userAgent = navigator.userAgent.toLowerCase();
+  return userAgent.includes('telegram') || 
+         userAgent.includes('tgios') || 
+         userAgent.includes('tgandroid') ||
+         // Check for Telegram-specific window properties
+         (window.TelegramWebviewProxy !== undefined) ||
+         // Check for common Telegram browser characteristics
+         (userAgent.includes('mobile') && userAgent.includes('webkit') && !userAgent.includes('chrome'));
+};
+
+/**
+ * Detect if the app is running in any mobile in-app browser
+ */
+export const isInAppBrowser = () => {
+  const userAgent = navigator.userAgent.toLowerCase();
+  return userAgent.includes('wv') || // WebView
+         userAgent.includes('telegram') ||
+         userAgent.includes('fbav') || // Facebook
+         userAgent.includes('fban') || // Facebook
+         userAgent.includes('instagram') ||
+         userAgent.includes('twitter') ||
+         userAgent.includes('line') ||
+         userAgent.includes('whatsapp');
+};
+
+/**
+ * Get browser information
+ */
+export const getBrowserInfo = () => {
+  const userAgent = navigator.userAgent;
+  return {
+    userAgent,
+    isTelegram: isTelegramBrowser(),
+    isInApp: isInAppBrowser(),
+    isMobile: /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(userAgent),
+    isIOS: /iPad|iPhone|iPod/.test(userAgent),
+    isAndroid: /Android/.test(userAgent)
+  };
+};
+
+/**
+ * Add polyfills and compatibility fixes for problematic browsers
+ */
+export const addCompatibilityFixes = () => {
+  // Fix for missing or problematic console methods in some browsers
+  if (!window.console) {
+    window.console = {
+      log: () => {},
+      error: () => {},
+      warn: () => {},
+      info: () => {},
+      debug: () => {}
+    };
+  }
+
+  // Ensure console methods exist and are functions
+  ['log', 'error', 'warn', 'info', 'debug'].forEach(method => {
+    if (typeof console[method] !== 'function') {
+      console[method] = () => {};
+    }
+  });
+
+  // Add requestAnimationFrame polyfill if missing
+  if (!window.requestAnimationFrame) {
+    window.requestAnimationFrame = (callback) => {
+      return setTimeout(callback, 1000 / 60);
+    };
+  }
+
+  // Add cancelAnimationFrame polyfill if missing
+  if (!window.cancelAnimationFrame) {
+    window.cancelAnimationFrame = (id) => {
+      clearTimeout(id);
+    };
+  }
+
+  // Fix for missing or problematic localStorage in some browsers
+  try {
+    localStorage.setItem('test', 'test');
+    localStorage.removeItem('test');
+  } catch (e) {
+    window.localStorage = {
+      getItem: () => null,
+      setItem: () => {},
+      removeItem: () => {},
+      clear: () => {},
+      length: 0,
+      key: () => null
+    };
+  }
+
+  // Fix for missing or problematic sessionStorage
+  try {
+    sessionStorage.setItem('test', 'test');
+    sessionStorage.removeItem('test');
+  } catch (e) {
+    window.sessionStorage = {
+      getItem: () => null,
+      setItem: () => {},
+      removeItem: () => {},
+      clear: () => {},
+      length: 0,
+      key: () => null
+    };
+  }
+};
+
+/**
+ * Initialize compatibility fixes
+ */
+export const initBrowserCompat = () => {
+  const browserInfo = getBrowserInfo();
+  
+  // Log browser info for debugging
+  console.log('Browser Info:', browserInfo);
+  
+  // Add compatibility fixes
+  addCompatibilityFixes();
+  
+  // Add specific fixes for Telegram browser
+  if (browserInfo.isTelegram) {
+    console.log('Telegram browser detected - applying compatibility fixes');
+    
+    // Add Telegram-specific error handling
+    window.addEventListener('error', (event) => {
+      console.log('Global error caught in Telegram browser:', event.error);
+      // Prevent the error from bubbling up and showing the error overlay
+      event.preventDefault();
+      return true;
+    });
+    
+    window.addEventListener('unhandledrejection', (event) => {
+      console.log('Unhandled promise rejection in Telegram browser:', event.reason);
+      // Prevent the error from bubbling up
+      event.preventDefault();
+      return true;
+    });
+  }
+  
+  return browserInfo;
+};
--- a/src/utils/contentExtractor.js
+++ b/src/utils/contentExtractor.js
@@ -0,0 +1,371 @@
+// Content extraction and article detection utilities
+
+/**
+ * Content classification types
+ */
+export const CONTENT_TYPES = {
+  RICH_ARTICLE: 'rich_article',
+  GENERAL_CONTENT: 'general_content', 
+  LIMITED_CONTENT: 'limited_content',
+  NO_CONTENT: 'no_content'
+};
+
+/**
+ * Content type display information
+ */
+export const CONTENT_TYPE_INFO = {
+  [CONTENT_TYPES.RICH_ARTICLE]: {
+    label: 'Rich Article Content',
+    emoji: '🟢',
+    description: 'Clear article structure with headings and paragraphs',
+    color: 'text-green-600 dark:text-green-400'
+  },
+  [CONTENT_TYPES.GENERAL_CONTENT]: {
+    label: 'General Web Content', 
+    emoji: '🟡',
+    description: 'Readable text mixed with navigation and UI elements',
+    color: 'text-yellow-600 dark:text-yellow-400'
+  },
+  [CONTENT_TYPES.LIMITED_CONTENT]: {
+    label: 'Limited Text Content',
+    emoji: '🟠', 
+    description: 'Mostly UI/navigation with minimal readable text',
+    color: 'text-orange-600 dark:text-orange-400'
+  },
+  [CONTENT_TYPES.NO_CONTENT]: {
+    label: 'No Readable Content',
+    emoji: '🔴',
+    description: 'Images, videos, or heavily JavaScript-dependent content',
+    color: 'text-red-600 dark:text-red-400'
+  }
+};
+
+/**
+ * CORS proxy services for fetching external content
+ */
+const CORS_PROXIES = [
+  'https://api.allorigins.win/get?url=',
+  'https://corsproxy.io/?',
+  'https://cors-anywhere.herokuapp.com/',
+  'https://thingproxy.freeboard.io/fetch/'
+];
+
+/**
+ * Fetch and parse HTML content from URL with CORS proxy fallback
+ */
+export const fetchUrlContent = async (url) => {
+  try {
+    // Validate URL
+    const urlObj = new URL(url);
+    if (!['http:', 'https:'].includes(urlObj.protocol)) {
+      throw new Error('Only HTTP and HTTPS URLs are supported');
+    }
+
+    // First try direct fetch (works for same-origin or CORS-enabled sites)
+    try {
+      const response = await fetch(url, {
+        method: 'GET',
+        headers: {
+          'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+          'User-Agent': 'Mozilla/5.0 (compatible; TextAnalyzer/1.0)'
+        }
+      });
+
+      if (response.ok) {
+        const contentType = response.headers.get('content-type') || '';
+        if (contentType.includes('text/html')) {
+          const html = await response.text();
+          return { html, url: response.url, contentType };
+        }
+      }
+    } catch (directError) {
+      console.log('Direct fetch failed, trying CORS proxy:', directError.message);
+    }
+
+    // Try CORS proxies
+    let lastError = null;
+    
+    for (const proxy of CORS_PROXIES) {
+      try {
+        let proxyUrl;
+        let response;
+        
+        if (proxy.includes('allorigins.win')) {
+          // AllOrigins returns JSON with contents
+          proxyUrl = `${proxy}${encodeURIComponent(url)}`;
+          response = await fetch(proxyUrl);
+          
+          if (response.ok) {
+            const data = await response.json();
+            if (data.contents) {
+              return { 
+                html: data.contents, 
+                url: data.status.url || url, 
+                contentType: 'text/html' 
+              };
+            }
+          }
+        } else {
+          // Other proxies return HTML directly
+          proxyUrl = `${proxy}${url}`;
+          response = await fetch(proxyUrl, {
+            headers: {
+              'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
+            }
+          });
+          
+          if (response.ok) {
+            const contentType = response.headers.get('content-type') || 'text/html';
+            if (contentType.includes('text/html') || contentType.includes('text/plain')) {
+              const html = await response.text();
+              return { html, url, contentType };
+            }
+          }
+        }
+      } catch (proxyError) {
+        lastError = proxyError;
+        console.log(`Proxy ${proxy} failed:`, proxyError.message);
+        continue;
+      }
+    }
+
+    throw new Error(`All fetch methods failed. Last error: ${lastError?.message || 'Unknown error'}`);
+    
+  } catch (error) {
+    throw new Error(`Failed to fetch content: ${error.message}`);
+  }
+};
+
+/**
+ * Parse HTML and create DOM
+ */
+export const parseHtml = (html) => {
+  const parser = new DOMParser();
+  const doc = parser.parseFromString(html, 'text/html');
+  return doc;
+};
+
+/**
+ * Detect article elements and structure
+ */
+export const detectArticleStructure = (doc) => {
+  const structure = {
+    hasArticleTag: false,
+    hasMainTag: false,
+    headingCount: 0,
+    paragraphCount: 0,
+    hasMetaArticle: false,
+    hasJsonLd: false,
+    wordCount: 0,
+    linkDensity: 0
+  };
+
+  // Check for semantic HTML5 tags
+  structure.hasArticleTag = doc.querySelector('article') !== null;
+  structure.hasMainTag = doc.querySelector('main') !== null;
+
+  // Count headings
+  structure.headingCount = doc.querySelectorAll('h1, h2, h3, h4, h5, h6').length;
+
+  // Count paragraphs
+  structure.paragraphCount = doc.querySelectorAll('p').length;
+
+  // Check meta tags for articles
+  const metaTags = doc.querySelectorAll('meta[property^="og:"], meta[name^="article:"]');
+  structure.hasMetaArticle = Array.from(metaTags).some(meta => 
+    meta.getAttribute('property') === 'og:type' && meta.getAttribute('content') === 'article' ||
+    meta.getAttribute('name')?.startsWith('article:')
+  );
+
+  // Check for JSON-LD structured data
+  const jsonLdScripts = doc.querySelectorAll('script[type="application/ld+json"]');
+  structure.hasJsonLd = Array.from(jsonLdScripts).some(script => {
+    try {
+      const data = JSON.parse(script.textContent);
+      const type = data['@type'] || (Array.isArray(data) ? data[0]['@type'] : null);
+      return type && ['Article', 'NewsArticle', 'BlogPosting'].includes(type);
+    } catch {
+      return false;
+    }
+  });
+
+  return structure;
+};
+
+/**
+ * Extract clean text from article elements
+ */
+export const extractArticleText = (doc) => {
+  const articleSelectors = [
+    'article',
+    'main article',
+    '[role="main"] article',
+    '.article-content',
+    '.post-content',
+    '.entry-content',
+    '.content-body'
+  ];
+
+  // Try to find article container
+  let articleContainer = null;
+  for (const selector of articleSelectors) {
+    articleContainer = doc.querySelector(selector);
+    if (articleContainer) break;
+  }
+
+  // If no article container, try main content area
+  if (!articleContainer) {
+    const mainSelectors = ['main', '[role="main"]', '#main', '#content', '.main-content'];
+    for (const selector of mainSelectors) {
+      articleContainer = doc.querySelector(selector);
+      if (articleContainer) break;
+    }
+  }
+
+  // Extract text from container or full document
+  const container = articleContainer || doc.body;
+  
+  if (!container) return { text: '', elements: [] };
+
+  // Remove unwanted elements
+  const unwantedSelectors = [
+    'script', 'style', 'nav', 'header', 'footer', 'aside',
+    '.navigation', '.nav', '.menu', '.sidebar', '.ads', '.advertisement',
+    '.social-share', '.comments', '.related-posts', '.author-bio'
+  ];
+
+  const clone = container.cloneNode(true);
+  unwantedSelectors.forEach(selector => {
+    clone.querySelectorAll(selector).forEach(el => el.remove());
+  });
+
+  // Extract text from meaningful elements
+  const meaningfulElements = clone.querySelectorAll('h1, h2, h3, h4, h5, h6, p, li, blockquote, pre');
+  const elements = Array.from(meaningfulElements).map(el => ({
+    tag: el.tagName.toLowerCase(),
+    text: el.textContent.trim(),
+    length: el.textContent.trim().length
+  })).filter(el => el.length > 0);
+
+  const text = elements.map(el => el.text).join('\n\n');
+
+  return { text, elements };
+};
+
+/**
+ * Extract all visible text from page
+ */
+export const extractAllText = (doc) => {
+  const clone = doc.body.cloneNode(true);
+  
+  // Remove unwanted elements
+  const unwantedSelectors = ['script', 'style', 'noscript'];
+  unwantedSelectors.forEach(selector => {
+    clone.querySelectorAll(selector).forEach(el => el.remove());
+  });
+
+  const text = clone.textContent || clone.innerText || '';
+  return text.replace(/\s+/g, ' ').trim();
+};
+
+/**
+ * Calculate content quality metrics
+ */
+export const calculateContentMetrics = (doc, articleText, allText) => {
+  const metrics = {
+    articleWordCount: articleText.split(/\s+/).filter(w => w.length > 0).length,
+    totalWordCount: allText.split(/\s+/).filter(w => w.length > 0).length,
+    contentRatio: 0,
+    linkCount: doc.querySelectorAll('a[href]').length,
+    imageCount: doc.querySelectorAll('img').length,
+    headingCount: doc.querySelectorAll('h1, h2, h3, h4, h5, h6').length,
+    paragraphCount: doc.querySelectorAll('p').length,
+    linkDensity: 0
+  };
+
+  if (metrics.totalWordCount > 0) {
+    metrics.contentRatio = metrics.articleWordCount / metrics.totalWordCount;
+    metrics.linkDensity = metrics.linkCount / metrics.totalWordCount;
+  }
+
+  return metrics;
+};
+
+/**
+ * Classify content type based on structure and metrics
+ */
+export const classifyContent = (structure, metrics, articleText) => {
+  const wordCount = metrics.articleWordCount;
+  const contentRatio = metrics.contentRatio;
+  const hasStructure = structure.hasArticleTag || structure.hasMainTag || structure.hasMetaArticle;
+  const hasGoodStructure = structure.headingCount >= 2 && structure.paragraphCount >= 3;
+
+  // Rich Article Content
+  if ((hasStructure || hasGoodStructure) && wordCount >= 300 && contentRatio > 0.6) {
+    return CONTENT_TYPES.RICH_ARTICLE;
+  }
+
+  // General Web Content  
+  if (wordCount >= 100 && contentRatio > 0.3) {
+    return CONTENT_TYPES.GENERAL_CONTENT;
+  }
+
+  // Limited Content
+  if (wordCount >= 20) {
+    return CONTENT_TYPES.LIMITED_CONTENT;
+  }
+
+  // No readable content
+  return CONTENT_TYPES.NO_CONTENT;
+};
+
+/**
+ * Main function to extract and analyze content from URL
+ */
+export const extractContentFromUrl = async (url) => {
+  try {
+    // Fetch content
+    const { html, url: finalUrl, contentType } = await fetchUrlContent(url);
+    
+    // Parse HTML
+    const doc = parseHtml(html);
+    
+    // Detect article structure
+    const structure = detectArticleStructure(doc);
+    
+    // Extract text content
+    const { text: articleText, elements } = extractArticleText(doc);
+    const allText = extractAllText(doc);
+    
+    // Calculate metrics
+    const metrics = calculateContentMetrics(doc, articleText, allText);
+    
+    // Classify content
+    const contentClassification = classifyContent(structure, metrics, articleText);
+    
+    // Get page metadata
+    const title = doc.querySelector('title')?.textContent?.trim() || '';
+    const description = doc.querySelector('meta[name="description"]')?.getAttribute('content') || '';
+    
+    return {
+      success: true,
+      url: finalUrl,
+      title,
+      description,
+      contentType: contentClassification,
+      structure,
+      metrics,
+      articleText,
+      allText,
+      elements,
+      extractedAt: new Date().toISOString()
+    };
+    
+  } catch (error) {
+    return {
+      success: false,
+      error: error.message,
+      url
+    };
+  }
+};