import millify from "millify";
import moment from "moment";
import { getPublicSuffix } from "tldts";

import { logger } from "@/lib/logger";
import { CrawlableUrl, parseCrawlableUrl } from "@/lib/sites";
import {
  EntityType,
  PersonCompanyRelationship,
  PersonPersonRelationship,
  RelationshipType,
} from "@/types";

export function smartTruncate(input: string, truncationPoint: number, truncateSlashes = true) {
  if (input.length < truncationPoint) return input;

  const previousWordIndex = input.lastIndexOf(" ", truncationPoint);
  if (previousWordIndex === -1) {
    // We cannot find a space to truncate at, so just truncate at the truncation point
    return input.slice(0, truncationPoint) + "...";
  }
  const truncatedString = input.slice(0, previousWordIndex).trim();

  const withoutPrepositions = truncatedString.replace(/\b(with|on|in|at|to|for|of)$/gi, "").trim();
  if (!truncateSlashes) return withoutPrepositions + "...";

  const slashesShortened = withoutPrepositions
    .split(" ")
    .map((word) => {
      if (word.includes("/")) return word.substring(word.lastIndexOf("/") + 1);
      else return word;
    })
    .join(" ");

  return slashesShortened + "...";
}

export function frontTruncate(input: string, truncationPoint: number) {
  if (input.length < truncationPoint) return input;
  return "..." + input.slice(-truncationPoint);
}

export function middleTruncate(input: string, truncationPoint: number) {
  if (input.length < truncationPoint * 2) return input;
  return input.slice(0, truncationPoint) + "..." + input.slice(-truncationPoint);
}

export function truncate(input: string, truncationPoint: number) {
  if (input.length < truncationPoint) return input;
  return input.slice(0, truncationPoint) + "...";
}

// safely parse html codes
export function decodeHtml(str: string) {
  const doc = new DOMParser().parseFromString(str, "text/html");
  return doc.documentElement.textContent;
}

export function decodeUnicodeEscapes(str: string): string {
  // This regex matches \u followed by exactly four hexadecimal digits
  return str.replace(/\\u([0-9a-fA-F]{4})/g, (match, grp) =>
    String.fromCharCode(parseInt(grp as string, 16)),
  );
}

export function classNames(...classes: (string | undefined | null | boolean)[]) {
  return classes.filter(Boolean).join(" ");
}

export function pluralize(count: number, word: string, pluralWord?: string) {
  return `${count} ${count === 1 ? word : pluralWord || `${word}s`}`;
}

export function splitOnce(s: string, on: string) {
  const index = s.indexOf(on);
  if (index == -1) return [s];
  return [s.slice(0, index), s.slice(index + 1)];
}

const notToUppercase = [
  "a",
  "an",
  "the",
  "and",
  "but",
  "or",
  "for",
  "nor",
  "on",
  "at",
  "to",
  "from",
  "by",
  "of",
];

export function titleCase(input: string) {
  return input
    .split(" ")
    .map((word) =>
      !word || notToUppercase.includes(word) ?
        word
      : `${word[0].toUpperCase()}${word.substring(1)}`,
    )
    .join(" ");
}

export function capitalizeFirstLetter(word: string) {
  return `${word[0].toUpperCase()}${word.substring(1)}`;
}

export function snakeToTitleCase(name: string) {
  return titleCase(name.replace(/_/g, " "));
}

export function generateRandomString(length: number, options?: { lowerCaseOnly: boolean }) {
  const characters =
    options?.lowerCaseOnly ?
      "abcdefghijklmnopqrstuvwxyz0123456789"
    : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  const charactersLength = characters.length;
  let result = "";

  // Create an array of 32-bit unsigned integers
  const randomValues = new Uint32Array(length);

  // Generate random values
  if (typeof crypto == "undefined") {
    for (let i = 0; i < length; i++) {
      randomValues[i] = Math.floor(Math.random() * charactersLength);
    }
  } else {
    crypto.getRandomValues(randomValues);
  }
  randomValues.forEach((value) => {
    result += characters.charAt(value % charactersLength);
  });
  return result;
}

export async function generateUniqueRandomString({
  checker,
  length = 6,
  lowerCaseOnly = false,
}: {
  checker: (value: string) => Promise<boolean>;
  length?: number;
  lowerCaseOnly?: boolean;
}) {
  for (let i = 0; i < 10; i++) {
    const slug = generateRandomString(length, { lowerCaseOnly });
    try {
      const result = await checker(slug);
      if (result) return slug;
    } catch (e) {
      logger.error(e, { slug });
    }
  }
  throw new Error("Failed to generate unique random string");
}

export function cleanBaseAliases(url: string): string | null {
  const crawlableUrl = parseCrawlableUrl(url);
  return crawlableUrl?.profileUrl ?? null;
}

export function cleanBaseUrl(url: string) {
  if (!url) return ""; // this is a degenerate condition but prevents a crash

  if (!url.startsWith("http")) {
    url = "https://" + url;
  }
  if (!url.includes(".")) {
    url += ".com";
  }
  url = url.replace("/www.", "/");
  if (url.endsWith("/")) {
    url = url.slice(0, url.length - 1);
  }
  try {
    const parsed = new URL(url);
    parsed.protocol = "https:";
    if (parsed.hostname.includes(".linkedin.com")) parsed.hostname = "linkedin.com";

    if (
      parsed.pathname == "/jobs" ||
      parsed.pathname == "/careers" ||
      parsed.pathname == "/about"
    ) {
      parsed.pathname = "/";
    }

    if (
      parsed.hostname == "linkedin.com" &&
      (parsed.pathname.startsWith("/in/") || parsed.pathname.startsWith("/company/"))
    ) {
      const path = parsed.pathname.split("/");
      if (path.length > 3) {
        // re-write query to only contain first two parts
        parsed.pathname = path.slice(0, 3).join("/");
      }
    }
    // only return hostname and path
    if (parsed.pathname == "/") return parsed.origin;

    return parsed.origin + parsed.pathname;
  } catch (e) {
    return url;
  }
}

export function isPersonOrCompanyLinkedInUrl(url: string) {
  return !!parseCrawlableUrl(url, { siteType: "linkedin" });
}

export function parsePersonOrCompanyLinkedInUrl(url: string): CrawlableUrl | null {
  if (!url) return null;
  return parseCrawlableUrl(url, { siteType: "linkedin" });
}

export function parsePersonOrCompanyLinkedInProfile(url: string): string | null {
  return parsePersonOrCompanyLinkedInUrl(url)?.profile ?? null;
}

export function prettyUrl(url: string, skipPath?: boolean) {
  try {
    const parsed = new URL(url);
    return (
      parsed.hostname.replace("www.", "") + (skipPath ? "" : parsed.pathname.replace(/\/$/, ""))
    );
  } catch (e) {
    return url.replace(/https?:\/\//, "").replace("www.", "");
  }
}

export function safeHostname(url: string) {
  return url.replace(/.*:\/\//, "").replace(/\/.*/, "");
}

// parse 1d, 1w, 1mo, 1y into a date
const compactMatcher = /(\d+)(\w+)/;
export const parseCompactDate = (dateStr: string) => {
  const parts = compactMatcher.exec(dateStr);
  const date = new Date();
  if (!parts) return date;
  const num = parseInt(parts[1]);
  const unit = parts[2];

  if (unit == "h") date.setHours(date.getHours() - num);
  else if (unit == "d") date.setDate(date.getDate() - num);
  else if (unit == "w") date.setDate(date.getDate() - num * 7);
  else if (unit == "mo") date.setMonth(date.getMonth() - num);
  else if (unit == "y") date.setFullYear(date.getFullYear() - num);
  return date;
};

export function stripUnicode(input: string | null | undefined) {
  if (!input) return input;
  return input.replace(/[^\x00-\x7F]/g, "");
}

/**
 * Get the part of a domain name that describes the company,
 * without nested subdomains and optionally the public suffix.
 *
 * See (https://publicsuffix.org/)
 *
 * @param domain - A domain name.
 * @param includePublicSuffix - Whether to include the public suffix in the result.
 * @returns The semantic domain
 */
export function getSemanticDomain({
  domain,
  includePublicSuffix,
}: {
  domain: string;
  includePublicSuffix?: boolean;
}): string {
  const publicSuffix = getPublicSuffix(domain);
  if (!publicSuffix) {
    return domain;
  }

  const domainWithoutSuffix = domain.slice(0, -(publicSuffix.length + 1));
  const parts = domainWithoutSuffix.split(".");
  const semanticDomain = parts.length < 2 ? domainWithoutSuffix : parts[parts.length - 1];
  return includePublicSuffix ? `${semanticDomain}.${publicSuffix}` : semanticDomain;
}

export function equalsIgnoreCase(a: string | null, b: string | null): boolean {
  if (a == null || b == null) {
    return a === b;
  }
  return a.trim().toLowerCase() === b.trim().toLowerCase();
}

export function removeSpecialChars(s: string): string {
  // Cyrillic characters:  \u0400 - \u04FF
  // Zero byte characters: \x00
  return s.replace(/[\u0400-\u04FF\x00]/g, "");
}

export function sanitizeHex(s: string): string {
  // Handle hex escapes
  return removeSpecialChars(s).replace(
    /\\x([0-9a-fA-F]{2})|\\x[0-9a-fA-F]?|\\u([0-9a-fA-F]{4})|\\u[0-9a-fA-F]{0,3}/g,
    (match, p1, p2) => {
      if (p1) {
        // Complete \x escape
        return Buffer.from(p1, "hex").toString("utf-8");
      } else if (p2) {
        // Complete \u escape
        return String.fromCharCode(parseInt(p2, 16));
      } else {
        // Incomplete escape, strip it
        return "";
      }
    },
  );
}

export function sanitizeObjectStrings<T>(obj: T, seen: Set<object> = new Set()): T {
  if (typeof obj === "string") {
    return sanitizeHex(obj) as T;
  }
  if (typeof obj !== "object" || obj === null) {
    return obj;
  }

  if (seen.has(obj)) {
    return obj;
  }

  seen.add(obj);

  const objRecord = obj as Record<string, unknown>;

  for (const [key, value] of Object.entries(obj)) {
    if (typeof value === "string") {
      objRecord[key] = sanitizeHex(value);
    } else if (Array.isArray(value)) {
      objRecord[key] = value.map((s) => sanitizeObjectStrings<unknown>(s, seen));
    } else if (typeof value === "object" && value !== null) {
      objRecord[key] = sanitizeObjectStrings(value as Record<string, unknown>, seen);
    }
  }
  return obj;
}

export const ageFromBirthYear = (birthYear: number | undefined) => {
  if (!birthYear) return undefined;
  const age = moment().diff(moment(birthYear, "YYYY"), "years");
  if (age < 18) return "under 18";
  if (age < 21) return "20'ish";
  if (age > 100) return "100+";
  else {
    const decade = Math.floor(age / 10) * 10;
    const position = age % 10;
    return `${decade}${position < 2 ? "'ish" : "'s"}`;
  }
};

const titles = ["mba", "phd", "dr", "mr", "jd", "md"];
const titlePattern = new RegExp(`\\b(${titles.join("|")})\\b`, "gi");

function stripTitles(name: string) {
  name = name.replace(/\./g, "");

  let cleanName = name.replace(titlePattern, "").trim();

  const commaPattern = /^(\w+\s+\w+),.*/;

  cleanName = cleanName.replace(commaPattern, "$1").trim();

  return cleanName;
}

export function slugify(name: string) {
  let parts = stripTitles(name)
    .replace(/\([^)]*\)/g, "")
    .trim()
    .split(" ")
    .map((part) => part.replace(/[^\p{L}\p{N}_]+/gu, "").trim())
    .filter(Boolean);

  if (parts.length == 0) {
    // this would happen if e.g. a name is mostly special characters
    parts = name.split(" ").map((part) => part.replace(/[^\p{L}\p{N}_]+/gu, "-"));
  }

  const encoded = encodeURIComponent(parts.join("_"));
  if (encoded.length == 0) {
    return Math.random().toString(36).substring(2, 15);
  }

  if (encoded.length > 40) {
    const lastPart = encoded.substring(0, 40).lastIndexOf("_");
    if (lastPart > 30) {
      // split at the last underscore
      return encoded.substring(0, lastPart);
    }
  }

  return encoded.substring(0, 40);
}

export function extractURLs(input: string): string[] {
  const urlPattern = /(?:(?:https?):\/\/)?(www\.)?([a-zA-Z0-9-]+\.[a-zA-Z0-9.-]+(?:\/[^\s?#]*)?)/g;

  const matches = [];
  let match;

  while ((match = urlPattern.exec(input)) !== null) {
    matches.push("https://" + (match[1] || "") + (match[2] || ""));
  }

  return Array.from(new Set(matches));
}

export function formatNumber(count: number) {
  return new Intl.NumberFormat("en-US").format(count);
}

export function validateSourcePattern(sourcePattern: string) {
  // Regex from GPT (Only used in blocked sources)
  const domainRegex = /^[a-zA-Z0-9][a-zA-Z0-9-_.]*\.[a-zA-Z]{2,}(\/[^\s]*)?$/;
  return domainRegex.test(sourcePattern);
}

export function extractLowerSlugFromDistillUrl(url: string) {
  const distillDomains = ["localhost:3000", "distill.fyi"];
  const isDistillDomain = distillDomains.includes(safeHostname(url));
  if (!isDistillDomain) return null;

  const match = url.match(/\/(p|c)\/([^\/]+)/);
  if (match) {
    return match[0];
  }

  return null;
}

export function unescapeHtmlCharacterEntities(s: string) {
  return decodeURIComponent(
    s.replace(/&#x\w*;/, (match: string) => {
      return `%${match.substring(3, match.length - 1)}`;
    }),
  );
}

export function normalizeUrl(url: string) {
  if (!url.includes("://")) {
    url = "https://" + url;
  }

  try {
    const parsed = new URL(url);
    const hostname = parsed.hostname.replace(/^www\./, "");
    url = `${parsed.protocol}//${hostname}${parsed.pathname}${parsed.search}`;
  } catch (e) {
    url = url.replace(/.*?:\/\//g, "");
  }
  return url.endsWith("/") ? url.slice(0, -1) : url;
}

export function containsWord(text: string, word: string, matchPrefix?: boolean): boolean {
  // Create a regex pattern that matches word boundaries
  const pattern = new RegExp(`\\b${word}${matchPrefix ? "" : "\\b"}`, "i");
  return pattern.test(text);
}

// returns the final path segment of a url
// useful when we expect the url to end with a filename
export function filenameFromUrl(url: string): string {
  try {
    const parsedUrl = new URL(url);
    return parsedUrl.pathname.split("/").pop() || "";
  } catch (e) {
    return "";
  }
}

/**
 * Checks if a candidate URL is a subdomain of a given domain or matches the domain.
 * Supports cookie-style wildcard domains like ".distill.fyi"
 *
 * @param safeHostName - The already-normalized domain name to check against.
 * @param candidateURL - The candidate domain or URL to verify.
 *
 * @returns True if the candidate URL is the same as the domain or a subdomain of it, false otherwise.
 */
export function isDomainOrSubdomain(safeHostName: string, candidateURL: string): boolean {
  if (!safeHostName || !candidateURL) return false;

  const candidateHostName = safeHostname(candidateURL);
  if (safeHostName === candidateHostName) return true;

  if (safeHostName.startsWith(".")) {
    return candidateHostName.endsWith(safeHostName);
  }
  return candidateHostName.endsWith(`.${safeHostName}`);
}

export function relationshipToLabel({
  type,
  endedDate,
}: {
  type: RelationshipType;
  endedDate?: string;
}) {
  if (type == PersonCompanyRelationship.EducatedAt) {
    return "attended";
  }
  if (type == PersonPersonRelationship.ConnectedTo) {
    return "is connected to";
  }
  if (type == PersonCompanyRelationship.WorkedAt && (!endedDate || endedDate == "Present")) {
    return "works at";
  }
  return type.replace(/-/g, " ");
}
