Improve ObjectEditor and Add TableEditor

This commit is contained in:
dwindown
2025-09-23 14:17:13 +07:00
parent cf750114f7
commit 977e784df2
15 changed files with 5329 additions and 345 deletions

147
src/utils/browserCompat.js Normal file
View File

@@ -0,0 +1,147 @@
// Browser compatibility utilities for handling different browser environments
/**
* Detect if the app is running in Telegram's built-in browser
*/
export const isTelegramBrowser = () => {
const userAgent = navigator.userAgent.toLowerCase();
return userAgent.includes('telegram') ||
userAgent.includes('tgios') ||
userAgent.includes('tgandroid') ||
// Check for Telegram-specific window properties
(window.TelegramWebviewProxy !== undefined) ||
// Check for common Telegram browser characteristics
(userAgent.includes('mobile') && userAgent.includes('webkit') && !userAgent.includes('chrome'));
};
/**
* Detect if the app is running in any mobile in-app browser
*/
export const isInAppBrowser = () => {
const userAgent = navigator.userAgent.toLowerCase();
return userAgent.includes('wv') || // WebView
userAgent.includes('telegram') ||
userAgent.includes('fbav') || // Facebook
userAgent.includes('fban') || // Facebook
userAgent.includes('instagram') ||
userAgent.includes('twitter') ||
userAgent.includes('line') ||
userAgent.includes('whatsapp');
};
/**
* Get browser information
*/
export const getBrowserInfo = () => {
const userAgent = navigator.userAgent;
return {
userAgent,
isTelegram: isTelegramBrowser(),
isInApp: isInAppBrowser(),
isMobile: /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(userAgent),
isIOS: /iPad|iPhone|iPod/.test(userAgent),
isAndroid: /Android/.test(userAgent)
};
};
/**
* Add polyfills and compatibility fixes for problematic browsers
*/
export const addCompatibilityFixes = () => {
// Fix for missing or problematic console methods in some browsers
if (!window.console) {
window.console = {
log: () => {},
error: () => {},
warn: () => {},
info: () => {},
debug: () => {}
};
}
// Ensure console methods exist and are functions
['log', 'error', 'warn', 'info', 'debug'].forEach(method => {
if (typeof console[method] !== 'function') {
console[method] = () => {};
}
});
// Add requestAnimationFrame polyfill if missing
if (!window.requestAnimationFrame) {
window.requestAnimationFrame = (callback) => {
return setTimeout(callback, 1000 / 60);
};
}
// Add cancelAnimationFrame polyfill if missing
if (!window.cancelAnimationFrame) {
window.cancelAnimationFrame = (id) => {
clearTimeout(id);
};
}
// Fix for missing or problematic localStorage in some browsers
try {
localStorage.setItem('test', 'test');
localStorage.removeItem('test');
} catch (e) {
window.localStorage = {
getItem: () => null,
setItem: () => {},
removeItem: () => {},
clear: () => {},
length: 0,
key: () => null
};
}
// Fix for missing or problematic sessionStorage
try {
sessionStorage.setItem('test', 'test');
sessionStorage.removeItem('test');
} catch (e) {
window.sessionStorage = {
getItem: () => null,
setItem: () => {},
removeItem: () => {},
clear: () => {},
length: 0,
key: () => null
};
}
};
/**
* Initialize compatibility fixes
*/
export const initBrowserCompat = () => {
const browserInfo = getBrowserInfo();
// Log browser info for debugging
console.log('Browser Info:', browserInfo);
// Add compatibility fixes
addCompatibilityFixes();
// Add specific fixes for Telegram browser
if (browserInfo.isTelegram) {
console.log('Telegram browser detected - applying compatibility fixes');
// Add Telegram-specific error handling
window.addEventListener('error', (event) => {
console.log('Global error caught in Telegram browser:', event.error);
// Prevent the error from bubbling up and showing the error overlay
event.preventDefault();
return true;
});
window.addEventListener('unhandledrejection', (event) => {
console.log('Unhandled promise rejection in Telegram browser:', event.reason);
// Prevent the error from bubbling up
event.preventDefault();
return true;
});
}
return browserInfo;
};

View File

@@ -0,0 +1,371 @@
// Content extraction and article detection utilities
/**
* Content classification types
*/
export const CONTENT_TYPES = {
RICH_ARTICLE: 'rich_article',
GENERAL_CONTENT: 'general_content',
LIMITED_CONTENT: 'limited_content',
NO_CONTENT: 'no_content'
};
/**
* Content type display information
*/
export const CONTENT_TYPE_INFO = {
[CONTENT_TYPES.RICH_ARTICLE]: {
label: 'Rich Article Content',
emoji: '🟢',
description: 'Clear article structure with headings and paragraphs',
color: 'text-green-600 dark:text-green-400'
},
[CONTENT_TYPES.GENERAL_CONTENT]: {
label: 'General Web Content',
emoji: '🟡',
description: 'Readable text mixed with navigation and UI elements',
color: 'text-yellow-600 dark:text-yellow-400'
},
[CONTENT_TYPES.LIMITED_CONTENT]: {
label: 'Limited Text Content',
emoji: '🟠',
description: 'Mostly UI/navigation with minimal readable text',
color: 'text-orange-600 dark:text-orange-400'
},
[CONTENT_TYPES.NO_CONTENT]: {
label: 'No Readable Content',
emoji: '🔴',
description: 'Images, videos, or heavily JavaScript-dependent content',
color: 'text-red-600 dark:text-red-400'
}
};
/**
* CORS proxy services for fetching external content
*/
const CORS_PROXIES = [
'https://api.allorigins.win/get?url=',
'https://corsproxy.io/?',
'https://cors-anywhere.herokuapp.com/',
'https://thingproxy.freeboard.io/fetch/'
];
/**
* Fetch and parse HTML content from URL with CORS proxy fallback
*/
export const fetchUrlContent = async (url) => {
try {
// Validate URL
const urlObj = new URL(url);
if (!['http:', 'https:'].includes(urlObj.protocol)) {
throw new Error('Only HTTP and HTTPS URLs are supported');
}
// First try direct fetch (works for same-origin or CORS-enabled sites)
try {
const response = await fetch(url, {
method: 'GET',
headers: {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (compatible; TextAnalyzer/1.0)'
}
});
if (response.ok) {
const contentType = response.headers.get('content-type') || '';
if (contentType.includes('text/html')) {
const html = await response.text();
return { html, url: response.url, contentType };
}
}
} catch (directError) {
console.log('Direct fetch failed, trying CORS proxy:', directError.message);
}
// Try CORS proxies
let lastError = null;
for (const proxy of CORS_PROXIES) {
try {
let proxyUrl;
let response;
if (proxy.includes('allorigins.win')) {
// AllOrigins returns JSON with contents
proxyUrl = `${proxy}${encodeURIComponent(url)}`;
response = await fetch(proxyUrl);
if (response.ok) {
const data = await response.json();
if (data.contents) {
return {
html: data.contents,
url: data.status.url || url,
contentType: 'text/html'
};
}
}
} else {
// Other proxies return HTML directly
proxyUrl = `${proxy}${url}`;
response = await fetch(proxyUrl, {
headers: {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
}
});
if (response.ok) {
const contentType = response.headers.get('content-type') || 'text/html';
if (contentType.includes('text/html') || contentType.includes('text/plain')) {
const html = await response.text();
return { html, url, contentType };
}
}
}
} catch (proxyError) {
lastError = proxyError;
console.log(`Proxy ${proxy} failed:`, proxyError.message);
continue;
}
}
throw new Error(`All fetch methods failed. Last error: ${lastError?.message || 'Unknown error'}`);
} catch (error) {
throw new Error(`Failed to fetch content: ${error.message}`);
}
};
/**
* Parse HTML and create DOM
*/
export const parseHtml = (html) => {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
return doc;
};
/**
* Detect article elements and structure
*/
export const detectArticleStructure = (doc) => {
const structure = {
hasArticleTag: false,
hasMainTag: false,
headingCount: 0,
paragraphCount: 0,
hasMetaArticle: false,
hasJsonLd: false,
wordCount: 0,
linkDensity: 0
};
// Check for semantic HTML5 tags
structure.hasArticleTag = doc.querySelector('article') !== null;
structure.hasMainTag = doc.querySelector('main') !== null;
// Count headings
structure.headingCount = doc.querySelectorAll('h1, h2, h3, h4, h5, h6').length;
// Count paragraphs
structure.paragraphCount = doc.querySelectorAll('p').length;
// Check meta tags for articles
const metaTags = doc.querySelectorAll('meta[property^="og:"], meta[name^="article:"]');
structure.hasMetaArticle = Array.from(metaTags).some(meta =>
meta.getAttribute('property') === 'og:type' && meta.getAttribute('content') === 'article' ||
meta.getAttribute('name')?.startsWith('article:')
);
// Check for JSON-LD structured data
const jsonLdScripts = doc.querySelectorAll('script[type="application/ld+json"]');
structure.hasJsonLd = Array.from(jsonLdScripts).some(script => {
try {
const data = JSON.parse(script.textContent);
const type = data['@type'] || (Array.isArray(data) ? data[0]['@type'] : null);
return type && ['Article', 'NewsArticle', 'BlogPosting'].includes(type);
} catch {
return false;
}
});
return structure;
};
/**
* Extract clean text from article elements
*/
export const extractArticleText = (doc) => {
const articleSelectors = [
'article',
'main article',
'[role="main"] article',
'.article-content',
'.post-content',
'.entry-content',
'.content-body'
];
// Try to find article container
let articleContainer = null;
for (const selector of articleSelectors) {
articleContainer = doc.querySelector(selector);
if (articleContainer) break;
}
// If no article container, try main content area
if (!articleContainer) {
const mainSelectors = ['main', '[role="main"]', '#main', '#content', '.main-content'];
for (const selector of mainSelectors) {
articleContainer = doc.querySelector(selector);
if (articleContainer) break;
}
}
// Extract text from container or full document
const container = articleContainer || doc.body;
if (!container) return { text: '', elements: [] };
// Remove unwanted elements
const unwantedSelectors = [
'script', 'style', 'nav', 'header', 'footer', 'aside',
'.navigation', '.nav', '.menu', '.sidebar', '.ads', '.advertisement',
'.social-share', '.comments', '.related-posts', '.author-bio'
];
const clone = container.cloneNode(true);
unwantedSelectors.forEach(selector => {
clone.querySelectorAll(selector).forEach(el => el.remove());
});
// Extract text from meaningful elements
const meaningfulElements = clone.querySelectorAll('h1, h2, h3, h4, h5, h6, p, li, blockquote, pre');
const elements = Array.from(meaningfulElements).map(el => ({
tag: el.tagName.toLowerCase(),
text: el.textContent.trim(),
length: el.textContent.trim().length
})).filter(el => el.length > 0);
const text = elements.map(el => el.text).join('\n\n');
return { text, elements };
};
/**
* Extract all visible text from page
*/
export const extractAllText = (doc) => {
const clone = doc.body.cloneNode(true);
// Remove unwanted elements
const unwantedSelectors = ['script', 'style', 'noscript'];
unwantedSelectors.forEach(selector => {
clone.querySelectorAll(selector).forEach(el => el.remove());
});
const text = clone.textContent || clone.innerText || '';
return text.replace(/\s+/g, ' ').trim();
};
/**
* Calculate content quality metrics
*/
export const calculateContentMetrics = (doc, articleText, allText) => {
const metrics = {
articleWordCount: articleText.split(/\s+/).filter(w => w.length > 0).length,
totalWordCount: allText.split(/\s+/).filter(w => w.length > 0).length,
contentRatio: 0,
linkCount: doc.querySelectorAll('a[href]').length,
imageCount: doc.querySelectorAll('img').length,
headingCount: doc.querySelectorAll('h1, h2, h3, h4, h5, h6').length,
paragraphCount: doc.querySelectorAll('p').length,
linkDensity: 0
};
if (metrics.totalWordCount > 0) {
metrics.contentRatio = metrics.articleWordCount / metrics.totalWordCount;
metrics.linkDensity = metrics.linkCount / metrics.totalWordCount;
}
return metrics;
};
/**
* Classify content type based on structure and metrics
*/
export const classifyContent = (structure, metrics, articleText) => {
const wordCount = metrics.articleWordCount;
const contentRatio = metrics.contentRatio;
const hasStructure = structure.hasArticleTag || structure.hasMainTag || structure.hasMetaArticle;
const hasGoodStructure = structure.headingCount >= 2 && structure.paragraphCount >= 3;
// Rich Article Content
if ((hasStructure || hasGoodStructure) && wordCount >= 300 && contentRatio > 0.6) {
return CONTENT_TYPES.RICH_ARTICLE;
}
// General Web Content
if (wordCount >= 100 && contentRatio > 0.3) {
return CONTENT_TYPES.GENERAL_CONTENT;
}
// Limited Content
if (wordCount >= 20) {
return CONTENT_TYPES.LIMITED_CONTENT;
}
// No readable content
return CONTENT_TYPES.NO_CONTENT;
};
/**
* Main function to extract and analyze content from URL
*/
export const extractContentFromUrl = async (url) => {
try {
// Fetch content
const { html, url: finalUrl, contentType } = await fetchUrlContent(url);
// Parse HTML
const doc = parseHtml(html);
// Detect article structure
const structure = detectArticleStructure(doc);
// Extract text content
const { text: articleText, elements } = extractArticleText(doc);
const allText = extractAllText(doc);
// Calculate metrics
const metrics = calculateContentMetrics(doc, articleText, allText);
// Classify content
const contentClassification = classifyContent(structure, metrics, articleText);
// Get page metadata
const title = doc.querySelector('title')?.textContent?.trim() || '';
const description = doc.querySelector('meta[name="description"]')?.getAttribute('content') || '';
return {
success: true,
url: finalUrl,
title,
description,
contentType: contentClassification,
structure,
metrics,
articleText,
allText,
elements,
extractedAt: new Date().toISOString()
};
} catch (error) {
return {
success: false,
error: error.message,
url
};
}
};