import emailAddresses, { ParsedMailbox } from "email-addresses";

import { isFreeDomain } from "@/lib/freemail";
import { equalsIgnoreCase, extractURLs, getSemanticDomain } from "@/lib/stringUtils";
import { EmailContact, EmailParts, isEmailParts } from "@/types";

const gmailDomains = ["gmail.com", "googlemail.com"];

// claude came up with this list
const emailPrefixesToSkip = [
  "abuse",
  "accounts",
  "admin",
  "alerts",
  "auto",
  "automated",
  "billing",
  "bot",
  "bounce",
  "bounces",
  "careers",
  "contact",
  "customerservice",
  "digest",
  "do-not-reply",
  "donotreply",
  "feedback",
  "founders",
  "hello",
  "help",
  "hr",
  "info",
  "inquiries",
  "jobs",
  "legal",
  "list",
  "mailer-daemon",
  "marketing",
  "media",
  "newsletter",
  "no-reply",
  "no.reply",
  "no_reply",
  "noreply",
  "notes",
  "notification",
  "notifications",
  "office",
  "orders",
  "postmaster",
  "press",
  "recruit",
  "reply",
  "returns",
  "sales",
  "security",
  "service",
  "social",
  "spam",
  "subscribe",
  "support",
  "system",
  "team",
  "unsubscribe",
  "updates",
  "webmaster",
  "hello",
].map((s) => new RegExp(`^${s}$|[\.\+\-\_]+${s}$|^${s}[\.\+\-]+`, "i"));

// claude came up with this list
const emailDomainsToSkip = new Set([
  "amazonses.com",
  "atlassian.com",
  "eloqua.com",
  "facebook.com",
  "freshdesk.com",
  "github.com",
  "google.com",
  "googlegroups.com",
  "groups.google.com",
  "guerrillamail.com",
  "helpscout.com",
  "hubspot.com",
  "intercom.io",
  "jira.com",
  "linkedin.com",
  "mailchimp.com",
  "mailgun.com",
  "mailinator.com",
  "marketo.com",
  "salesforce.com",
  "sendgrid.com",
  "shopify.com",
  "slack.com",
  "squarespace.com",
  "temp-mail.org",
  "trello.com",
  "twitter.com",
  "wix.com",
  "wordpress.com",
  "zendesk.com",
]);

const tokenize = (input: string) => input.split(/[^A-Za-z0-9_]+/);

type Email = EmailContact | string;

export interface EmailSearchQueryCreator {
  formatSearchName(parts: EmailParts): string[] | null;
  toSearchQuery({ email, companyName }: { email: Email; companyName?: string }): string;
}

class EmailParser implements EmailSearchQueryCreator {
  /**
   * Convert the contact name into words.
   *
   * If the name is only one word then it ignores it, because
   * that is not sufficient for searching.
   */
  private parseNameTokensForSearch(parts: EmailParts): string[] | null {
    const { name, local } = parts;

    const nameTokens = name ? tokenize(name) : [];
    if (nameTokens.length > 1) {
      return nameTokens;
    }

    const localTokens = tokenize(local);
    if (localTokens.length > 0) {
      return localTokens;
    }

    return nameTokens.length > 0 ? nameTokens : null;
  }

  /**
   * Remove non-letter characters (except for apostrophes and hyphens)
   * from the name if it has multiple words.
   *
   * Return the tokenized name or null if there is no name.
   */
  public formatSearchName(parts: EmailParts): string[] | null {
    const nameTokens = this.parseNameTokensForSearch(parts);
    if (!nameTokens) return null;

    // \p{L} is a Unicode property escape that matches any kind of letter
    // from any language.
    return nameTokens.map((token) => token.replace(/[^\p{L}'-]/gu, " ").trim());
  }

  /**
   * Create a query for a search engine given an email and optional company name
   *
   * Tries to create as "natural" a query as possible when the contact name
   * and company are not present.
   */
  toSearchQuery({ email, companyName }: { email: Email; companyName?: string }): string {
    const parts = this.parse(email);
    const nameTokens = this.formatSearchName(parts);

    const domainTokens =
      companyName ? tokenize(companyName)
      : isFreeDomain(parts.domain) ? []
      : [getSemanticDomain({ domain: parts.domain })];

    const searchDomain = domainTokens.join(" ").trim();

    if (nameTokens) {
      const searchName = nameTokens.join(" ").trim();
      if (domainTokens.length > 1) {
        // Quote the domain if it has multiple tokens otherwise it drowns out the name
        return `(${searchName} "${searchDomain}") OR ${parts.address}`;
      }
      if (domainTokens.length === 1) {
        return `(${searchName} ${searchDomain}) OR ${parts.address}`;
      }
      // No domain tokens, so we just use the name tokens
      if (nameTokens.length > 1) {
        return `(${searchName}) OR ${parts.address}`;
      }
      if (searchName.length > 3) {
        // If we just have a one word name then we AND it with the address
        // because otherwise it is too low signal
        return `${searchName} ${parts.address}`;
      }
      return parts.address;
    }

    switch (domainTokens.length) {
      case 0:
        return parts.address;
      case 1:
        return `${parts.address} OR ${searchDomain}`;
      default:
        return `${parts.address} OR "${searchDomain}"`;
    }
  }

  /**
   * Get the domain of an email address.
   */
  getDomain(input: string): string {
    const parsed = input ? (emailAddresses.parseOneAddress(input) as ParsedMailbox) : null;
    if (!parsed) return "";

    return getSemanticDomain({ domain: parsed.domain, includePublicSuffix: true });
  }

  /**
   * Normalize an email address.
   *
   * Ensure it is lower case, remove dots from gmail addresses, and remove
   * the plus sign from the local part.
   */
  private normalizeEmail(emailParts: EmailParts): EmailParts {
    const { local, domain, address, ...rest } = emailParts;

    const normalizedDomain = domain.toLowerCase();
    let normalizedLocal = local.toLowerCase();

    if (gmailDomains.includes(normalizedDomain)) {
      normalizedLocal = normalizedLocal.replace(/\./g, "");
    }

    const plusIndex = normalizedLocal.indexOf("+");
    if (plusIndex !== -1) {
      normalizedLocal = normalizedLocal.substring(0, plusIndex);
    }

    return {
      ...rest,
      address: `${normalizedLocal}@${normalizedDomain}`,
      local: normalizedLocal,
      domain: normalizedDomain,
    };
  }

  private parseString(input: string): EmailParts {
    const parsed = emailAddresses({
      input,
      oneResult: true,
      rfc6532: true, // Allow unicode
      simple: true,
      startAt: "address",
      commaInDisplayName: true, // Allow addresses in name
      rejectTLD: true,
    }) as ParsedMailbox | null;

    if (!parsed || !parsed.address) {
      return { input, address: "", domain: "", local: "", name: undefined };
    }

    return this.normalizeEmail({
      input,
      address: parsed.address,
      name: parsed.name ?? undefined,
      domain: parsed.domain,
      local: parsed.local,
    });
  }

  /**
   * Parse an email address.
   *
   * If the email is already a valid EmailParts object, then return it.
   * Otherwise, parse the string and return the EmailParts object.
   */
  parse(input: Email): EmailParts {
    if (!input) {
      return { input: "", address: "", domain: "", local: "", name: undefined };
    }

    if (typeof input === "string") {
      return this.parseString(input);
    }

    if (isEmailParts(input)) {
      return input;
    }

    const { name: inputName, address: inputAddress } = input;
    const parsed = this.parseString(inputAddress);

    // Ignore names that are just repeats of the email address
    if (inputName) {
      const normalizedName = equalsIgnoreCase(inputName, parsed.address) ? undefined : inputName;
      return {
        ...parsed,
        name: normalizedName,
      };
    }

    return parsed;
  }

  shouldSkip(email: Email): boolean {
    const { local, domain, address } = this.parse(email);
    if (!address) return true;

    // Check for purely numeric
    if (/^\d+$/.test(local)) {
      return true;
    }

    const semanticDomain = getSemanticDomain({ domain, includePublicSuffix: true });
    if (semanticDomain && emailDomainsToSkip.has(semanticDomain)) {
      return true;
    }

    if (emailPrefixesToSkip.some((s) => s.test(local))) {
      return true;
    }

    return false;
  }

  /**
   * Extract emails from a comma separated string.
   * @param input - A comma separated string of emails.
   * @returns An array of EmailParts objects.
   */
  extractEmails(input: string): EmailParts[] {
    const elements = input.split(",");
    const result = [];
    for (const element of elements) {
      const parts = this.parse(element);
      if (parts.address) result.push(parts);
    }
    return Array.from(new Set(result));
  }

  /**
   * Extract emails and URLs from a string.
   * If no email or URL is found, then return the input as the name.
   * @param input - A string of emails and URLs separated by commas or newlines.
   * @returns Extracted emails and URLs.
   */
  extractURLsAndEmails(input: string): { url?: string; email?: EmailParts; name: string }[] {
    const elements = input
      .split(/[,\n]/)
      .map((s) => s.trim())
      .filter(Boolean);
    const result = [];
    for (const element of elements) {
      const email = this.parse(element);
      if (email.address) {
        result.push({ email, name: element });
        continue;
      }
      const urls = extractURLs(element);
      if (urls.length > 0) {
        for (const url of urls) result.push({ url, name: element });
        continue;
      }
      result.push({ name: element });
    }
    return result;
  }
}

const emailParser = new EmailParser();

export default emailParser;
