Files
Metify/src/app/actions.ts

314 lines
8.5 KiB
TypeScript

"use server";
import * as cheerio from "cheerio";
interface FaqItem {
question: string;
answer: string;
}
export interface HeadlineNode {
tag: string;
text: string;
length: number;
level: number;
children: HeadlineNode[];
}
export interface ImageAltData {
src: string;
alt: string;
size: number | null;
}
export interface LinkData {
href: string;
text: string;
type: "internal" | "external" | "anchor" | "other";
rel: string;
}
export interface DetectedSystem {
name: string;
}
export async function extractMetaData(url: string, keyword?: string) {
if (!url) {
return { error: "URL is required." };
}
let formattedUrl = url;
if (!/^https?:\/\//i.test(url)) {
formattedUrl = `https://${url}`;
}
try {
const response = await fetch(formattedUrl, {
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
},
});
if (!response.ok) {
return { error: `Failed to fetch URL. Status: ${response.status}` };
}
const html = await response.text();
const $ = cheerio.load(html);
const title =
$('meta[property="og:title"]').attr("content") ||
$("title").text() ||
"No title found";
const description =
$('meta[property="og:description"]').attr("content") ||
$('meta[name="description"]').attr("content") ||
"No description found";
const image = $('meta[property="og:image"]').attr("content") || null;
const canonical = $('link[rel="canonical"]').attr("href") || null;
const robots = $('meta[name="robots"]').attr("content") || null;
// Social Tags
const openGraph = {
title: $('meta[property="og:title"]').attr("content") || title,
description:
$('meta[property="og:description"]').attr("content") || description,
image: $('meta[property="og:image"]').attr("content") || image,
url: $('meta[property="og:url"]').attr("content") || null,
siteName: $('meta[property="og:site_name"]').attr("content") || null,
type: $('meta[property="og:type"]').attr("content") || null,
};
const twitter = {
card: $('meta[name="twitter:card"]').attr("content") || null,
title: $('meta[name="twitter:title"]').attr("content") || openGraph.title,
description:
$('meta[name="twitter:description"]').attr("content") ||
openGraph.description,
image: $('meta[name="twitter:image"]').attr("content") || openGraph.image,
site: $('meta[name="twitter:site"]').attr("content") || null,
creator: $('meta[name="twitter:creator"]').attr("content") || null,
};
const faqData: FaqItem[] = [];
const schemaData: any[] = [];
$('script[type="application/ld+json"]').each((i, el) => {
const jsonContent = $(el).html();
if (!jsonContent) return;
try {
const data = JSON.parse(jsonContent);
const graph = data["@graph"] || [data];
schemaData.push(...graph);
for (const item of graph) {
if (item["@type"] === "FAQPage" && Array.isArray(item.mainEntity)) {
item.mainEntity.forEach((qa: any) => {
if (
qa["@type"] === "Question" &&
qa.name &&
qa.acceptedAnswer &&
qa.acceptedAnswer.text
) {
faqData.push({
question: qa.name,
answer: qa.acceptedAnswer.text,
});
}
});
}
}
} catch (e) {
// Ignore parsing errors
}
});
const headlines: HeadlineNode[] = [];
const path: HeadlineNode[] = [];
$("h1, h2, h3, h4, h5, h6").each((i, el) => {
const tag = $(el).prop("tagName").toLowerCase();
const text = $(el).text().trim();
if (!text) return;
const level = parseInt(tag.replace("h", ""), 10);
const node: HeadlineNode = {
tag,
text,
length: text.length,
level,
children: [],
};
while (path.length > 0 && path[path.length - 1].level >= level) {
path.pop();
}
if (path.length === 0) {
headlines.push(node);
} else {
path[path.length - 1].children.push(node);
}
path.push(node);
});
let keywordCount: number | null = null;
const trimmedKeyword = keyword?.trim();
if (trimmedKeyword) {
$("script, style").remove();
const bodyText = $("body").text();
const regex = new RegExp(trimmedKeyword, "gi");
const matches = bodyText.match(regex);
keywordCount = matches ? matches.length : 0;
}
const imageSrcs: { src: string; alt: string }[] = [];
$("img").each((i, el) => {
const src = $(el).attr("src");
const alt = $(el).attr("alt") || "";
if (src) {
try {
const absoluteSrc = new URL(src, formattedUrl).href;
imageSrcs.push({
src: absoluteSrc,
alt: alt.trim(),
});
} catch (e) {
// Ignore invalid URLs
}
}
});
const imageSizePromises = imageSrcs.map(async (img) => {
try {
const res = await fetch(img.src, { method: "HEAD" });
if (res.ok) {
const contentLength = res.headers.get("content-length");
return {
...img,
size: contentLength ? parseInt(contentLength, 10) : null,
};
}
return { ...img, size: null };
} catch (error) {
return { ...img, size: null };
}
});
const imageAltData: ImageAltData[] = await Promise.all(imageSizePromises);
const links: LinkData[] = [];
const pageUrl = new URL(formattedUrl);
$("a").each((i, el) => {
const href = $(el).attr("href");
if (!href) return;
const text = $(el).text().trim();
const rel = $(el).attr("rel") || "";
let type: LinkData["type"] = "external";
let absoluteUrl = href;
try {
const linkUrl = new URL(href, formattedUrl);
absoluteUrl = linkUrl.href;
if (linkUrl.hostname === pageUrl.hostname) {
type = "internal";
}
} catch (e) {
if (href.startsWith("#")) type = "anchor";
else if (href.startsWith("mailto:") || href.startsWith("tel:"))
type = "other";
}
links.push({ href: absoluteUrl, text, type, rel });
});
const detectedSystems: DetectedSystem[] = [];
const htmlContent = $.html();
const uniqueSystems = new Set<string>();
// WordPress
if (
$('meta[name="generator"][content*="WordPress"]').length > 0 ||
htmlContent.includes("/wp-content/") ||
htmlContent.includes("/wp-includes/")
) {
uniqueSystems.add("WordPress");
}
// Shopify
if (
htmlContent.includes("cdn.shopify.com") ||
htmlContent.includes("Shopify.theme")
) {
uniqueSystems.add("Shopify");
}
// Next.js
if ($("#__next").length > 0) {
uniqueSystems.add("Next.js");
uniqueSystems.add("React"); // Next.js uses React
}
// React (generic)
if ($("#root").length > 0) {
uniqueSystems.add("React");
}
// Webflow
if (
$('meta[name="generator"][content="Webflow"]').length > 0 ||
htmlContent.includes("<!-- This site was created in Webflow.")
) {
uniqueSystems.add("Webflow");
}
// Wix
if ($('meta[name="generator"][content*="Wix.com"]').length > 0) {
uniqueSystems.add("Wix");
}
// Squarespace
if (htmlContent.includes("static1.squarespace.com")) {
uniqueSystems.add("Squarespace");
}
uniqueSystems.forEach((system) => {
detectedSystems.push({ name: system });
});
return {
data: {
title,
description,
image,
canonical,
robots,
openGraph,
twitter,
faq: faqData.length > 0 ? faqData : null,
schema: schemaData.length > 0 ? schemaData : null,
headlines: headlines.length > 0 ? headlines : null,
keyword: trimmedKeyword || null,
keywordCount,
images: imageAltData.length > 0 ? imageAltData : null,
links: links.length > 0 ? links : null,
systems: detectedSystems.length > 0 ? detectedSystems : null,
},
};
} catch (error) {
console.error(error);
if (error instanceof Error && error.message.includes("Invalid URL")) {
return {
error: "The provided URL is not valid. Please check and try again.",
};
}
return { error: "An unexpected error occurred while fetching the URL." };
}
}