import { sanitizeLink } from "@/crawler/sanitizeLinks";
import { DstLocation } from "@/dst";
import { isValid } from "date-fns/isValid";
import { parse } from "date-fns/parse";
import { parseISO } from "date-fns/parseISO";

export const CoreExtractors = {
  /**
   * Extract an array from a value
   * @param value - The value to extract the array from
   * @returns The extracted array or undefined if the value is not an array
   */
  Array<T>(value: unknown): T[] | undefined {
    if (!value || !Array.isArray(value) || value.length === 0) {
      return undefined;
    }
    return value as T[];
  },

  /**
   * Sanitize a link by removing UTM parameters, redirect URLs, and other non-canonical links
   * @param value - The link to sanitize
   * @returns The sanitized link or undefined if the value is not a string
   */
  SanitizeLink: (value: unknown): string | undefined => {
    if (!value || typeof value !== "string") {
      return undefined;
    }
    const link = sanitizeLink(value);
    return link ?? undefined;
  },

  /**
   * Normalize whitespace by collapsing multiple spaces and trimming leading and trailing spaces
   * @param value - The value to normalize
   * @returns The normalized value or undefined if the value is not a string
   */
  NormalizeWhitespace: (value: unknown): string | undefined => {
    if (!value || typeof value !== "string") {
      return undefined;
    }

    return value
      .replace(/\s+/g, " ") // Collapse multiple spaces
      .trim();
  },

  /**
   * Extract a number from a string
   * @param value - The value to extract the number from
   * @returns The extracted number or undefined if the value is not a string
   */
  Number: (value: unknown): number | undefined => {
    if (!value) {
      return undefined;
    }
    if (typeof value === "number") {
      return value;
    }
    if (typeof value !== "string") {
      return undefined;
    }
    const int = parseInt(value.replace(/,/g, "").trim());
    if (isNaN(int)) {
      return undefined;
    }
    return int;
  },

  /**
   * If the location is a DstLocation, return the raw location
   * Otherwise, return the value if it is a string
   * @param value - The value to extract the location from
   * @returns The extracted location or undefined
   */
  RawLocation: (value: unknown): string | undefined => {
    if (!value) {
      return undefined;
    }
    if (typeof value === "string") {
      return value;
    }
    const location = value as Partial<DstLocation>;
    return location.raw;
  },

  /**
   * Extract a string from a value and remove leading and trailing quotes
   * @param value - The value to extract the string from
   * @returns The extracted string or undefined if the value is not a string
   */
  Unquoted: (value: unknown): string | undefined => {
    if (!value || typeof value !== "string") {
      return undefined;
    }
    return value.trim().replace(/^["']+|["']+$/g, "");
  },

  Date: (value: unknown): Date | undefined => {
    if (!value) {
      return undefined;
    }
    if (typeof value === "number") {
      return new Date(value);
    }
    if (value instanceof Date) {
      return value;
    }
    if (typeof value !== "string") {
      return undefined;
    }
    // Check if value starts with four numbers (YYYY format)
    if (/^\d{4}/.test(value)) {
      // Try to parse as ISO date string
      const isoDate = parseISO(value);
      if (isValid(isoDate)) {
        return isoDate;
      }
    }
    // else, check for crunchbase format
    const parsedDate = parse(value, "MMM d, yyyy", new Date());
    if (isValid(parsedDate)) {
      return parsedDate;
    }
    // attempt to parse it naively
    const naiveDate = new Date(value);
    if (isValid(naiveDate)) {
      return naiveDate;
    }
    return undefined;
  },
};
