Files
Metify/src/app/actions.ts

169 lines
4.3 KiB
TypeScript

"use server";
import * as cheerio from "cheerio";
interface FaqItem {
question: string;
answer: string;
}
export interface HeadlineNode {
tag: string;
text: string;
length: number;
level: number;
children: HeadlineNode[];
}
export interface ImageAltData {
src: string;
alt: string;
}
export async function extractMetaData(url: string, keyword?: string) {
if (!url) {
return { error: "URL is required." };
}
let formattedUrl = url;
if (!/^https?:\/\//i.test(url)) {
formattedUrl = `https://${url}`;
}
try {
const response = await fetch(formattedUrl, {
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
if (!response.ok) {
return { error: `Failed to fetch URL. Status: ${response.status}` };
}
const html = await response.text();
const $ = cheerio.load(html);
const title =
$('meta[property="og:title"]').attr("content") ||
$("title").text() ||
"No title found";
const description =
$('meta[property="og:description"]').attr("content") ||
$('meta[name="description"]').attr("content") ||
"No description found";
const image = $('meta[property="og:image"]').attr("content") || null;
const faqData: FaqItem[] = [];
$('script[type="application/ld+json"]').each((i, el) => {
const jsonContent = $(el).html();
if (!jsonContent) return;
try {
const data = JSON.parse(jsonContent);
const graph = data["@graph"] || [data];
for (const item of graph) {
if (item["@type"] === "FAQPage" && Array.isArray(item.mainEntity)) {
item.mainEntity.forEach((qa: any) => {
if (
qa["@type"] === "Question" &&
qa.name &&
qa.acceptedAnswer &&
qa.acceptedAnswer.text
) {
faqData.push({
question: qa.name,
answer: qa.acceptedAnswer.text,
});
}
});
}
}
} catch (e) {
// Ignore parsing errors
}
});
const headlines: HeadlineNode[] = [];
const path: HeadlineNode[] = [];
$("h1, h2, h3, h4, h5, h6").each((i, el) => {
const tag = $(el).prop("tagName").toLowerCase();
const text = $(el).text().trim();
if (!text) return;
const level = parseInt(tag.replace("h", ""), 10);
const node: HeadlineNode = {
tag,
text,
length: text.length,
level,
children: [],
};
while (path.length > 0 && path[path.length - 1].level >= level) {
path.pop();
}
if (path.length === 0) {
headlines.push(node);
} else {
path[path.length - 1].children.push(node);
}
path.push(node);
});
let keywordCount: number | null = null;
const trimmedKeyword = keyword?.trim();
if (trimmedKeyword) {
$("script, style").remove();
const bodyText = $("body").text();
const regex = new RegExp(trimmedKeyword, "gi");
const matches = bodyText.match(regex);
keywordCount = matches ? matches.length : 0;
}
const imageAltData: ImageAltData[] = [];
$("img").each((i, el) => {
const src = $(el).attr("src");
const alt = $(el).attr("alt") || "";
if (src) {
try {
const absoluteSrc = new URL(src, formattedUrl).href;
imageAltData.push({
src: absoluteSrc,
alt: alt.trim(),
});
} catch (e) {
// Ignore invalid URLs
}
}
});
return {
data: {
title,
description,
image,
faq: faqData.length > 0 ? faqData : null,
headlines: headlines.length > 0 ? headlines : null,
keyword: trimmedKeyword || null,
keywordCount,
images: imageAltData.length > 0 ? imageAltData : null,
},
};
} catch (error) {
console.error(error);
if (error instanceof Error && error.message.includes("Invalid URL")) {
return {
error: "The provided URL is not valid. Please check and try again.",
};
}
return { error: "An unexpected error occurred while fetching the URL." };
}
}