"use server"; import * as cheerio from "cheerio"; interface FaqItem { question: string; answer: string; } export interface HeadlineNode { tag: string; text: string; length: number; level: number; children: HeadlineNode[]; } export interface ImageAltData { src: string; alt: string; } export async function extractMetaData(url: string, keyword?: string) { if (!url) { return { error: "URL is required." }; } let formattedUrl = url; if (!/^https?:\/\//i.test(url)) { formattedUrl = `https://${url}`; } try { const response = await fetch(formattedUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", }, }); if (!response.ok) { return { error: `Failed to fetch URL. Status: ${response.status}` }; } const html = await response.text(); const $ = cheerio.load(html); const title = $('meta[property="og:title"]').attr("content") || $("title").text() || "No title found"; const description = $('meta[property="og:description"]').attr("content") || $('meta[name="description"]').attr("content") || "No description found"; const image = $('meta[property="og:image"]').attr("content") || null; const faqData: FaqItem[] = []; $('script[type="application/ld+json"]').each((i, el) => { const jsonContent = $(el).html(); if (!jsonContent) return; try { const data = JSON.parse(jsonContent); const graph = data["@graph"] || [data]; for (const item of graph) { if (item["@type"] === "FAQPage" && Array.isArray(item.mainEntity)) { item.mainEntity.forEach((qa: any) => { if ( qa["@type"] === "Question" && qa.name && qa.acceptedAnswer && qa.acceptedAnswer.text ) { faqData.push({ question: qa.name, answer: qa.acceptedAnswer.text, }); } }); } } } catch (e) { // Ignore parsing errors } }); const headlines: HeadlineNode[] = []; const path: HeadlineNode[] = []; $("h1, h2, h3, h4, h5, h6").each((i, el) => { const tag = $(el).prop("tagName").toLowerCase(); const text = $(el).text().trim(); if (!text) return; const level = parseInt(tag.replace("h", ""), 10); const node: HeadlineNode = { tag, text, length: text.length, level, children: [], }; while (path.length > 0 && path[path.length - 1].level >= level) { path.pop(); } if (path.length === 0) { headlines.push(node); } else { path[path.length - 1].children.push(node); } path.push(node); }); let keywordCount: number | null = null; const trimmedKeyword = keyword?.trim(); if (trimmedKeyword) { $("script, style").remove(); const bodyText = $("body").text(); const regex = new RegExp(trimmedKeyword, "gi"); const matches = bodyText.match(regex); keywordCount = matches ? matches.length : 0; } const imageAltData: ImageAltData[] = []; $("img").each((i, el) => { const src = $(el).attr("src"); const alt = $(el).attr("alt") || ""; if (src) { try { const absoluteSrc = new URL(src, formattedUrl).href; imageAltData.push({ src: absoluteSrc, alt: alt.trim(), }); } catch (e) { // Ignore invalid URLs } } }); return { data: { title, description, image, faq: faqData.length > 0 ? faqData : null, headlines: headlines.length > 0 ? headlines : null, keyword: trimmedKeyword || null, keywordCount, images: imageAltData.length > 0 ? imageAltData : null, }, }; } catch (error) { console.error(error); if (error instanceof Error && error.message.includes("Invalid URL")) { return { error: "The provided URL is not valid. Please check and try again.", }; } return { error: "An unexpected error occurred while fetching the URL." }; } }