189 lines
4.9 KiB
TypeScript
189 lines
4.9 KiB
TypeScript
"use server";
|
|
|
|
import * as cheerio from "cheerio";
|
|
|
|
interface FaqItem {
|
|
question: string;
|
|
answer: string;
|
|
}
|
|
|
|
export interface HeadlineNode {
|
|
tag: string;
|
|
text: string;
|
|
length: number;
|
|
level: number;
|
|
children: HeadlineNode[];
|
|
}
|
|
|
|
export interface ImageAltData {
|
|
src: string;
|
|
alt: string;
|
|
size: number | null;
|
|
}
|
|
|
|
export async function extractMetaData(url: string, keyword?: string) {
|
|
if (!url) {
|
|
return { error: "URL is required." };
|
|
}
|
|
|
|
let formattedUrl = url;
|
|
if (!/^https?:\/\//i.test(url)) {
|
|
formattedUrl = `https://${url}`;
|
|
}
|
|
|
|
try {
|
|
const response = await fetch(formattedUrl, {
|
|
headers: {
|
|
"User-Agent":
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
},
|
|
});
|
|
|
|
if (!response.ok) {
|
|
return { error: `Failed to fetch URL. Status: ${response.status}` };
|
|
}
|
|
|
|
const html = await response.text();
|
|
const $ = cheerio.load(html);
|
|
|
|
const title =
|
|
$('meta[property="og:title"]').attr("content") ||
|
|
$("title").text() ||
|
|
"No title found";
|
|
const description =
|
|
$('meta[property="og:description"]').attr("content") ||
|
|
$('meta[name="description"]').attr("content") ||
|
|
"No description found";
|
|
const image = $('meta[property="og:image"]').attr("content") || null;
|
|
|
|
const faqData: FaqItem[] = [];
|
|
$('script[type="application/ld+json"]').each((i, el) => {
|
|
const jsonContent = $(el).html();
|
|
if (!jsonContent) return;
|
|
|
|
try {
|
|
const data = JSON.parse(jsonContent);
|
|
const graph = data["@graph"] || [data];
|
|
|
|
for (const item of graph) {
|
|
if (item["@type"] === "FAQPage" && Array.isArray(item.mainEntity)) {
|
|
item.mainEntity.forEach((qa: any) => {
|
|
if (
|
|
qa["@type"] === "Question" &&
|
|
qa.name &&
|
|
qa.acceptedAnswer &&
|
|
qa.acceptedAnswer.text
|
|
) {
|
|
faqData.push({
|
|
question: qa.name,
|
|
answer: qa.acceptedAnswer.text,
|
|
});
|
|
}
|
|
});
|
|
}
|
|
}
|
|
} catch (e) {
|
|
// Ignore parsing errors
|
|
}
|
|
});
|
|
|
|
const headlines: HeadlineNode[] = [];
|
|
const path: HeadlineNode[] = [];
|
|
|
|
$("h1, h2, h3, h4, h5, h6").each((i, el) => {
|
|
const tag = $(el).prop("tagName").toLowerCase();
|
|
const text = $(el).text().trim();
|
|
if (!text) return;
|
|
|
|
const level = parseInt(tag.replace("h", ""), 10);
|
|
|
|
const node: HeadlineNode = {
|
|
tag,
|
|
text,
|
|
length: text.length,
|
|
level,
|
|
children: [],
|
|
};
|
|
|
|
while (path.length > 0 && path[path.length - 1].level >= level) {
|
|
path.pop();
|
|
}
|
|
|
|
if (path.length === 0) {
|
|
headlines.push(node);
|
|
} else {
|
|
path[path.length - 1].children.push(node);
|
|
}
|
|
|
|
path.push(node);
|
|
});
|
|
|
|
let keywordCount: number | null = null;
|
|
const trimmedKeyword = keyword?.trim();
|
|
if (trimmedKeyword) {
|
|
$("script, style").remove();
|
|
const bodyText = $("body").text();
|
|
const regex = new RegExp(trimmedKeyword, "gi");
|
|
const matches = bodyText.match(regex);
|
|
keywordCount = matches ? matches.length : 0;
|
|
}
|
|
|
|
const imageSrcs: { src: string; alt: string }[] = [];
|
|
$("img").each((i, el) => {
|
|
const src = $(el).attr("src");
|
|
const alt = $(el).attr("alt") || "";
|
|
|
|
if (src) {
|
|
try {
|
|
const absoluteSrc = new URL(src, formattedUrl).href;
|
|
imageSrcs.push({
|
|
src: absoluteSrc,
|
|
alt: alt.trim(),
|
|
});
|
|
} catch (e) {
|
|
// Ignore invalid URLs
|
|
}
|
|
}
|
|
});
|
|
|
|
const imageSizePromises = imageSrcs.map(async (img) => {
|
|
try {
|
|
// Use a HEAD request for efficiency
|
|
const res = await fetch(img.src, { method: "HEAD" });
|
|
if (res.ok) {
|
|
const contentLength = res.headers.get("content-length");
|
|
return {
|
|
...img,
|
|
size: contentLength ? parseInt(contentLength, 10) : null,
|
|
};
|
|
}
|
|
return { ...img, size: null };
|
|
} catch (error) {
|
|
return { ...img, size: null };
|
|
}
|
|
});
|
|
|
|
const imageAltData: ImageAltData[] = await Promise.all(imageSizePromises);
|
|
|
|
return {
|
|
data: {
|
|
title,
|
|
description,
|
|
image,
|
|
faq: faqData.length > 0 ? faqData : null,
|
|
headlines: headlines.length > 0 ? headlines : null,
|
|
keyword: trimmedKeyword || null,
|
|
keywordCount,
|
|
images: imageAltData.length > 0 ? imageAltData : null,
|
|
},
|
|
};
|
|
} catch (error) {
|
|
console.error(error);
|
|
if (error instanceof Error && error.message.includes("Invalid URL")) {
|
|
return {
|
|
error: "The provided URL is not valid. Please check and try again.",
|
|
};
|
|
}
|
|
return { error: "An unexpected error occurred while fetching the URL." };
|
|
}
|
|
} |