import { castArray, isEqual, some } from 'lodash-es';
import { useCallback } from 'react';
import { TranscriptWord } from 'api';
import { useWordList } from '../state';
import { normalizeQuery, normalizeTranscriptWord } from '../utils/search';

export type SearchResult = number[][];

export interface SearchInTranscriptConfig {
  splitWords?: boolean;
}

interface UseSearchInTranscriptResult {
  searchInTranscript: (
    query: string | string[],
    config?: SearchInTranscriptConfig,
  ) => SearchResult;
}

export default function useSearchInTranscript(): UseSearchInTranscriptResult {
  const { data: words = [] } = useWordList();

  const searchInTranscript = useCallback(
    (
      query: string | string[],
      config?: SearchInTranscriptConfig,
    ): SearchResult => {
      if (!query || !query.length) return [];

      const { splitWords = true } = config || {};
      const queries = castArray(query);
      const normalizedQueries = queries.map((q) => normalizeQuery(q));
      const allResults: SearchResult = [];

      const checkMatches = (
        combinedText: string,
        currentSequence: number[],
        word: TranscriptWord,
      ) => {
        const normalizedCombinedText = normalizeTranscriptWord(combinedText);

        normalizedQueries.forEach((normalizedQuery, index) => {
          if (
            splitWords &&
            // Check for a direct match between the normalized transcript text
            // and the normalized query. This allows matching where text formatting
            // like case and punctuation are standardized.
            (normalizedCombinedText === normalizedQuery ||
              // Check for a direct, case-insensitive match between the raw transcript
              // text and the user's query. This is important to capture exact matches
              // that might be missed after normalization, such as when punctuation or
              // specific formatting in the query are crucial for the search context.
              combinedText === queries[index].toLocaleLowerCase())
          ) {
            return allResults.push([...currentSequence]);
          }

          if (
            !splitWords &&
            // Ensure the word ID is not already included in 'allResults' to
            // avoid duplicates.
            !some(allResults, (arr) => isEqual(arr, [word.id])) &&
            // Check if the two strings match exactly without any transformations.
            // Example: 'test' matches 'test' but does not match 'test,' or 'test.'
            (normalizedQuery === word.text.toLowerCase() ||
              // Check if the two strings match when ignoring punctuation.
              // This uses a normalization function to strip punctuation before
              // comparison.
              // Example: both 'test,' and 'test.' match 'test'
              normalizedQuery === normalizeTranscriptWord(word.text))
          ) {
            return allResults.push([word.id]);
          }
        });
      };

      for (let i = 0; i < words.length; i++) {
        let combinedText = '';
        const currentSequence: number[] = [];

        for (let j = i; j < words.length; j++) {
          const wordText = words[j].text.toLowerCase();

          if (!wordText) break;

          combinedText = combinedText
            ? `${combinedText} ${wordText}`
            : wordText;

          currentSequence.push(words[j].id);

          checkMatches(combinedText, currentSequence, words[j]);
        }
      }
      return allResults;
    },
    [words],
  );

  return {
    searchInTranscript,
  };
}
