"use server"; import * as cheerio from "cheerio"; interface FaqItem { question: string; answer: string; } export interface HeadlineNode { tag: string; text: string; length: number; level: number; children: HeadlineNode[]; } export interface ImageAltData { src: string; alt: string; size: number | null; } export interface LinkData { href: string; text: string; type: "internal" | "external" | "anchor" | "other"; rel: string; } export interface DetectedSystem { name: string; } export async function extractMetaData(url: string, keyword?: string) { if (!url) { return { error: "URL is required." }; } let formattedUrl = url; if (!/^https?:\/\//i.test(url)) { formattedUrl = `https://${url}`; } try { const response = await fetch(formattedUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); if (!response.ok) { return { error: `Failed to fetch URL. Status: ${response.status}` }; } const html = await response.text(); const $ = cheerio.load(html); const title = $('meta[property="og:title"]').attr("content") || $("title").text() || "No title found"; const description = $('meta[property="og:description"]').attr("content") || $('meta[name="description"]').attr("content") || "No description found"; const image = $('meta[property="og:image"]').attr("content") || null; const canonical = $('link[rel="canonical"]').attr("href") || null; const robots = $('meta[name="robots"]').attr("content") || null; // Social Tags const openGraph = { title: $('meta[property="og:title"]').attr("content") || title, description: $('meta[property="og:description"]').attr("content") || description, image: $('meta[property="og:image"]').attr("content") || image, url: $('meta[property="og:url"]').attr("content") || null, siteName: $('meta[property="og:site_name"]').attr("content") || null, type: $('meta[property="og:type"]').attr("content") || null, }; const twitter = { card: $('meta[name="twitter:card"]').attr("content") || null, title: $('meta[name="twitter:title"]').attr("content") || openGraph.title, description: $('meta[name="twitter:description"]').attr("content") || openGraph.description, image: $('meta[name="twitter:image"]').attr("content") || openGraph.image, site: $('meta[name="twitter:site"]').attr("content") || null, creator: $('meta[name="twitter:creator"]').attr("content") || null, }; const faqData: FaqItem[] = []; const schemaData: any[] = []; $('script[type="application/ld+json"]').each((i, el) => { const jsonContent = $(el).html(); if (!jsonContent) return; try { const data = JSON.parse(jsonContent); const graph = data["@graph"] || [data]; schemaData.push(...graph); for (const item of graph) { if (item["@type"] === "FAQPage" && Array.isArray(item.mainEntity)) { item.mainEntity.forEach((qa: any) => { if ( qa["@type"] === "Question" && qa.name && qa.acceptedAnswer && qa.acceptedAnswer.text ) { faqData.push({ question: qa.name, answer: qa.acceptedAnswer.text, }); } }); } } } catch (e) { // Ignore parsing errors } }); const headlines: HeadlineNode[] = []; const path: HeadlineNode[] = []; $("h1, h2, h3, h4, h5, h6").each((i, el) => { const tag = $(el).prop("tagName").toLowerCase(); const text = $(el).text().trim(); if (!text) return; const level = parseInt(tag.replace("h", ""), 10); const node: HeadlineNode = { tag, text, length: text.length, level, children: [], }; while (path.length > 0 && path[path.length - 1].level >= level) { path.pop(); } if (path.length === 0) { headlines.push(node); } else { path[path.length - 1].children.push(node); } path.push(node); }); let keywordCount: number | null = null; const trimmedKeyword = keyword?.trim(); if (trimmedKeyword) { $("script, style").remove(); const bodyText = $("body").text(); const regex = new RegExp(trimmedKeyword, "gi"); const matches = bodyText.match(regex); keywordCount = matches ? matches.length : 0; } const imageSrcs: { src: string; alt: string }[] = []; $("img").each((i, el) => { const src = $(el).attr("src"); const alt = $(el).attr("alt") || ""; if (src) { try { const absoluteSrc = new URL(src, formattedUrl).href; imageSrcs.push({ src: absoluteSrc, alt: alt.trim(), }); } catch (e) { // Ignore invalid URLs } } }); const imageSizePromises = imageSrcs.map(async (img) => { try { const res = await fetch(img.src, { method: "HEAD" }); if (res.ok) { const contentLength = res.headers.get("content-length"); return { ...img, size: contentLength ? parseInt(contentLength, 10) : null, }; } return { ...img, size: null }; } catch (error) { return { ...img, size: null }; } }); const imageAltData: ImageAltData[] = await Promise.all(imageSizePromises); const links: LinkData[] = []; const pageUrl = new URL(formattedUrl); $("a").each((i, el) => { const href = $(el).attr("href"); if (!href) return; const text = $(el).text().trim(); const rel = $(el).attr("rel") || ""; let type: LinkData["type"] = "external"; let absoluteUrl = href; try { const linkUrl = new URL(href, formattedUrl); absoluteUrl = linkUrl.href; if (linkUrl.hostname === pageUrl.hostname) { type = "internal"; } } catch (e) { if (href.startsWith("#")) type = "anchor"; else if (href.startsWith("mailto:") || href.startsWith("tel:")) type = "other"; } links.push({ href: absoluteUrl, text, type, rel }); }); const detectedSystems: DetectedSystem[] = []; const htmlContent = $.html(); const uniqueSystems = new Set(); // WordPress if ( $('meta[name="generator"][content*="WordPress"]').length > 0 || htmlContent.includes("/wp-content/") || htmlContent.includes("/wp-includes/") ) { uniqueSystems.add("WordPress"); } // Shopify if ( htmlContent.includes("cdn.shopify.com") || htmlContent.includes("Shopify.theme") ) { uniqueSystems.add("Shopify"); } // Next.js if ($("#__next").length > 0) { uniqueSystems.add("Next.js"); uniqueSystems.add("React"); // Next.js uses React } // React (generic) if ($("#root").length > 0) { uniqueSystems.add("React"); } // Webflow if ( $('meta[name="generator"][content="Webflow"]').length > 0 || htmlContent.includes("