// Adapted from https://github.com/bvaughn/highlight-words-core

// NOTE: this file has some non-standard style in it (weird mix of super-functional and imperative).
//       Please do not use this kind of style elsewhere in Cipher.
//       TODO: At some point we should re-write this file.

import _ from 'underscore'

import { has_japanese_or_korean_characters } from './language_utils.js'

const check_for_greek_symbols_regex = new RegExp("[Ά-ωΑ-ώ]", 'gi')
// match whole words when search terms are enclosed in quotation marks
const check_for_whole_word_matching_regex = new RegExp(/^["'].*["']$/, 'i')

export const find_all = ({
  auto_escape,
  highlight_prefix_only,
  input_is_raw_html,
  case_sensitive = false,
  find_chunks = default_find_chunks,
  sanitize,
  search_words,
  text_to_highlight
}) => {

  // Each chunk is a substring to be highlighted.
  const chunks = find_chunks({
    auto_escape,
    input_is_raw_html,
    case_sensitive,
    sanitize,
    search_words,
    text_to_highlight,
    highlight_prefix_only
  })

  // Some chunks may overlap, we need to be combined.
  const combined_chunks = combine_chunks({ chunks })

  // Finally, add chunks for the non-highlighted text
  const filled_in_chunks = fill_in_chunks({
    chunks_to_highlight: combined_chunks,
    total_length: text_to_highlight ? text_to_highlight.length : 0
  })

  return filled_in_chunks
}


export const fill_in_chunks = ({
  chunks_to_highlight,
  total_length
}) => {
  const all_chunks = []
  const append = (start, end, highlight, search_words_idx) => {
    if (end - start > 0) {
      all_chunks.push({
        start,
        end,
        highlight,
        search_words_idx
      })
    }
  }

  if (chunks_to_highlight.length === 0) {
    append(0, total_length, false)
  } else {
    let last_index = 0
    chunks_to_highlight.forEach((chunk) => {
      append(last_index, chunk.start, false)
      append(chunk.start, chunk.end, true, chunk.search_words_idx)
      last_index = chunk.end
    })
    append(last_index, total_length, false)
  }
  return all_chunks
}

export const combine_chunks = ({
  chunks
}) => {
  chunks = chunks
    .sort((first, second) => first.start - second.start)
    .reduce((processed_chunks, next_chunk) => {
      // First chunk just goes straight in the array...
      if (processed_chunks.length === 0) {
        return [next_chunk]
      } else {
        // ... subsequent chunks get checked to see if they overlap...
        const prev_chunk = processed_chunks.pop()
        if (next_chunk.start < prev_chunk.end) {
          // It may be the case that prev_chunk completely surrounds next_chunk, so take the
          // largest of the end indeces.
          const end_index = Math.max(prev_chunk.end, next_chunk.end)

          // Choose larger search word
          const prev_length = prev_chunk.end - prev_chunk.start
          const next_length = next_chunk.end - next_chunk.start
          const search_words_idx = prev_length > next_length ? prev_chunk.search_words_idx : next_chunk.search_words_idx

          processed_chunks.push({highlight: false, start: prev_chunk.start, end: end_index, search_words_idx })
        } else {
          processed_chunks.push(prev_chunk, next_chunk)
        }
        return processed_chunks
      }
    }, [])

  return chunks
}

function sanitize_and_autoescape(search_word, sanitize, autoescape) {
  const sanitized = sanitize(search_word)
  return autoescape ? escape_reg_exp_fn(search_word) : sanitized
}

function add_word_boundaries(search_word, highlight_prefix_only) {
  if (has_japanese_or_korean_characters(search_word) || check_for_greek_symbols_regex.test(search_word)) {
    // we can't use word boundaries with Japanese, Korean or Greek character(s); return search word as is
    // https://stackoverflow.com/questions/2881445/utf-8-word-boundary-regex-in-javascript
    return search_word
  }
  if (check_for_whole_word_matching_regex.test(search_word)) {
    // terms enclosed in quotation marks should be matched as whole words only
    return search_word.replace(/^["']|["']$/g, '\\b')
  } else if (highlight_prefix_only) {
    // Match prefix only (add word boundary to the beginning of the regex
    return `\\b${search_word}`
  }
  return search_word
}

function expand_for_sub_and_sup_integer_matching(search_word) {
  // if the search string contains integers, expand the regex to allow matches where these are sub/superscripted
  // via html tags... for eg chemical symbols like carbon dioxide may appear in documents as "CO2" or "CO<sub>2</sub>"
  return search_word.replace(/[0-9]+/g, (match) => {
    return `(<su[bp]>)?${match}(</su[bp]>)?`
  })
}

function replace_wildcards(search_word) {
  // replace wildcards in the search string to match strings of non-whitespace characters starting with those
  // before the '*'
  return search_word.replace(/[*]/g, '\\S*')
}

function get_search_word_as_regex(
  search_word,
  sanitize,
  auto_escape,
  highlight_prefix_only,
  case_sensitive
) {
  const sanitized_and_autoescaped = sanitize_and_autoescape(search_word, sanitize, auto_escape)

  const with_word_boundaries = add_word_boundaries(sanitized_and_autoescaped, highlight_prefix_only)

  const with_wildcards_replaced = replace_wildcards(with_word_boundaries)

  const with_expanded_integers = expand_for_sub_and_sup_integer_matching(with_wildcards_replaced)

  return new RegExp(with_expanded_integers, case_sensitive ? 'g' : 'gi')
}

export function default_find_chunks({
  auto_escape,
  highlight_prefix_only,
  case_sensitive,
  input_is_raw_html,
  sanitize = default_sanitize,
  search_words,
  text_to_highlight
}) {
  text_to_highlight = sanitize(text_to_highlight)

  text_to_highlight = input_is_raw_html ? replace_html_tags_with_spaces(text_to_highlight) : text_to_highlight

  return search_words
    .reduce((chunks, search_word, search_words_idx) => {
      if (!search_word) {
        return chunks
      }

      const regex = get_search_word_as_regex(search_word, sanitize, auto_escape, highlight_prefix_only, case_sensitive)

      let match
      while ((match = regex.exec(text_to_highlight))) {
        let start = match.index
        let end = regex.lastIndex
        // Only return non-zero-length matches
        if (end > start) {
          chunks.push({highlight: false, start, end, search_words_idx})
        }

        // Prevent browsers like Firefox from getting stuck in an infinite loop
        // See http://www.regexguru.com/2008/04/watch-out-for-zero-length-matches/
        if (match.index === regex.lastIndex) {
          regex.lastIndex++
        }
      }

      return chunks
    }, [])
}

export function default_sanitize(string) {
  return string
}

export function contains_sub_or_superscript_tags_only(string) {
  // returns true if the string contains <sub> or <sup> tags but no others
  return /<\/?su[bp]>/.test(string) && !/<\/?(?!su[bp])\w*\b[^>]*>/.test(string)
}

export function replace_html_tags_with_spaces(string) {
  // replace all html tags except for sub/superscript, which may be needed to match and highlight chemical symbols
  return string.replace(/<\/?(?!su[bp])\w*\b[^>]*>/g, (match) => {
    return _.range(match.length).map(() => ' ').join('')
  })
}

export function escape_reg_exp_fn (string) {
  return string.replace(/[.+?^${}()|[\]\\]/g, '\\$&') // $& means the whole matched string
}

/**
 * @param {*} hexstring a 6-digit hexstring, with a hash i.e. '#ffffff'
 * @returns true if hexstring's colour is so dark that only light colours can be read over it.
 */
export function check_need_light_complimentary_colour(hexstring){
  // From https://24ways.org/2010/calculating-color-contrast
  // The above article suggests a threshold of yiq=128,
  // but some medium grey backgrounds still made dark foreground text unreadable,
  // so I increased it a little.
  const hexcolor_clean = hexstring.replace("#", "")
  const r = parseInt(hexcolor_clean.substr(0,2),16)
  const g = parseInt(hexcolor_clean.substr(2,2),16)
  const b = parseInt(hexcolor_clean.substr(4,2),16)
  const yiq = ((r * 299) + (g * 587) + (b * 114)) / 1000
  return (yiq < 160)
}