1:   2:   3:   4:   5:   6:   7:   8:   9:  10:  11:  12:  13:  14:  15:  16:  17:  18:  19:  20:  21:  22:  23:  24:  25:  26:  27:  28:  29:  30:  31:  32:  33:  34:  35:  36:  37:  38:  39:  40:  41:  42:  43:  44:  45:  46:  47:  48:  49:  50:  51:  52:  53:  54:  55:  56:  57:  58:  59:  60:  61:  62:  63:  64:  65:  66:  67:  68:  69:  70:  71:  72:  73:  74:  75:  76:  77:  78:  79:  80:  81:  82:  83:  84:  85:  86:  87:  88:  89:  90:  91:  92:  93:  94:  95:  96:  97:  98:  99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 
<?php

/**
 * Simple Machines Forum (SMF)
 *
 * @package SMF
 * @author Simple Machines http://www.simplemachines.org
 * @copyright 2019 Simple Machines and individual contributors
 * @license http://www.simplemachines.org/about/smf/license.php BSD
 *
 * @version 2.1 RC1
 */

if (!defined('SMF'))
    die('No direct access...');

/**
 * Class fulltext_search
 * Used for fulltext index searching
 */
class fulltext_search extends search_api
{
    /**
     * @var array Which words are banned
     */
    protected $bannedWords = array();

    /**
     * @var int The minimum word length
     */
    protected $min_word_length = 4;

    /**
     * @var array Which databases support this method?
     */
    protected $supported_databases = array('mysql', 'postgresql');

    /**
     * The constructor function
     */
    public function __construct()
    {
        global $modSettings, $db_type;

        // Is this database supported?
        if (!in_array($db_type, $this->supported_databases))
        {
            $this->is_supported = false;
            return;
        }

        $this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']);
        $this->min_word_length = $this->_getMinWordLength();
    }

    /**
     * {@inheritDoc}
     */
    public function supportsMethod($methodName, $query_params = null)
    {
        $return = false;
        switch ($methodName)
        {
            case 'searchSort':
            case 'prepareIndexes':
            case 'indexedWordQuery':
            case 'postRemoved':
                $return = true;
                break;

            // All other methods, too bad dunno you.
            default:
                $return = false;
                break;
        }

        // Maybe parent got support
        if (!$return)
            $return = parent::supportsMethod($methodName, $query_params);

        return $return;
    }

    /**
     * fulltext_search::_getMinWordLength()
     *
     * What is the minimum word length full text supports?
     *
     * @return int The minimum word length
     */
    protected function _getMinWordLength()
    {
        global $smcFunc, $db_type;

        if ($db_type == 'postgresql')
            return 0;
        // Try to determine the minimum number of letters for a fulltext search.
        $request = $smcFunc['db_search_query']('max_fulltext_length', '
            SHOW VARIABLES
            LIKE {string:fulltext_minimum_word_length}',
            array(
                'fulltext_minimum_word_length' => 'ft_min_word_len',
            )
        );
        if ($request !== false && $smcFunc['db_num_rows']($request) == 1)
        {
            list (, $min_word_length) = $smcFunc['db_fetch_row']($request);
            $smcFunc['db_free_result']($request);
        }
        // 4 is the MySQL default...
        else
            $min_word_length = 4;

        return $min_word_length;
    }

    /**
     * {@inheritDoc}
     */
    public function searchSort($a, $b)
    {
        global $excludedWords, $smcFunc;

        $x = $smcFunc['strlen']($a) - (in_array($a, $excludedWords) ? 1000 : 0);
        $y = $smcFunc['strlen']($b) - (in_array($b, $excludedWords) ? 1000 : 0);

        return $x < $y ? 1 : ($x > $y ? -1 : 0);
    }

    /**
     * {@inheritDoc}
     */
    public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
    {
        global $modSettings, $smcFunc;

        $subwords = text2words($word, null, false);

        if (empty($modSettings['search_force_index']))
        {
            // A boolean capable search engine and not forced to only use an index, we may use a non indexed search
            // this is harder on the server so we are restrictive here
            if (count($subwords) > 1 && preg_match('~[.:@$]~', $word))
            {
                // using special characters that a full index would ignore and the remaining words are short which would also be ignored
                if (($smcFunc['strlen'](current($subwords)) < $this->min_word_length) && ($smcFunc['strlen'](next($subwords)) < $this->min_word_length))
                {
                    $wordsSearch['words'][] = trim($word, "/*- ");
                    $wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"';
                }
            }
            elseif ($smcFunc['strlen'](trim($word, "/*- ")) < $this->min_word_length)
            {
                // short words have feelings too
                $wordsSearch['words'][] = trim($word, "/*- ");
                $wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"';
            }
        }

        $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
        $wordsSearch['indexed_words'][] = $fulltextWord;
        if ($isExcluded)
            $wordsExclude[] = $fulltextWord;
    }

    /**
     * {@inheritDoc}
     */
    public function indexedWordQuery(array $words, array $search_data)
    {
        global $modSettings, $smcFunc;

        $query_select = array(
            'id_msg' => 'm.id_msg',
        );
        $query_where = array();
        $query_params = $search_data['params'];

        if ($smcFunc['db_title'] == "PostgreSQL")
            $modSettings['search_simple_fulltext'] = true;

        if ($query_params['id_search'])
            $query_select['id_search'] = '{int:id_search}';

        $count = 0;
        if (empty($modSettings['search_simple_fulltext']))
            foreach ($words['words'] as $regularWord)
            {
                $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:complex_body_' . $count . '}';
                $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
            }

        if ($query_params['user_query'])
            $query_where[] = '{raw:user_query}';
        if ($query_params['board_query'])
            $query_where[] = 'm.id_board {raw:board_query}';

        if ($query_params['topic'])
            $query_where[] = 'm.id_topic = {int:topic}';
        if ($query_params['min_msg_id'])
            $query_where[] = 'm.id_msg >= {int:min_msg_id}';
        if ($query_params['max_msg_id'])
            $query_where[] = 'm.id_msg <= {int:max_msg_id}';

        $count = 0;
        if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
            foreach ($query_params['excluded_phrases'] as $phrase)
            {
                $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_phrase_' . $count . '}';
                $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
            }
        $count = 0;
        if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
            foreach ($query_params['excluded_subject_words'] as $excludedWord)
            {
                $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_words_' . $count . '}';
                $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
            }

        if (!empty($modSettings['search_simple_fulltext']))
        {
            if ($smcFunc['db_title'] == "PostgreSQL")
            {
                $language_ftx = $smcFunc['db_search_language']();

                $query_where[] = 'to_tsvector({string:language_ftx},body) @@ plainto_tsquery({string:language_ftx},{string:body_match})';
                $query_params['language_ftx'] = $language_ftx;
            }
            else
                $query_where[] = 'MATCH (body) AGAINST ({string:body_match})';
            $query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words']));
        }
        else
        {
            $query_params['boolean_match'] = '';

            // remove any indexed words that are used in the complex body search terms
            $words['indexed_words'] = array_diff($words['indexed_words'], $words['complex_words']);

            if ($smcFunc['db_title'] == "PostgreSQL")
            {
                $row = 0;
                foreach ($words['indexed_words'] as $fulltextWord)
                {
                    $query_params['boolean_match'] .= ($row <> 0 ? '&' : '');
                    $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '!' : '') . $fulltextWord . ' ';
                    $row++;
                }
            }
            else
                foreach ($words['indexed_words'] as $fulltextWord)
                    $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' ';

            $query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1);

            // if we have bool terms to search, add them in
            if ($query_params['boolean_match'])
            {
                if ($smcFunc['db_title'] == "PostgreSQL")
                {
                    $language_ftx = $smcFunc['db_search_language']();

                    $query_where[] = 'to_tsvector({string:language_ftx},body) @@ plainto_tsquery({string:language_ftx},{string:boolean_match})';
                    $query_params['language_ftx'] = $language_ftx;
                }
                else
                    $query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)';
            }
        }

        $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ('
            INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
                (' . implode(', ', array_keys($query_select)) . ')') : '') . '
            SELECT ' . implode(', ', $query_select) . '
            FROM {db_prefix}messages AS m
            WHERE ' . implode('
                AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
            LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
            $query_params
        );

        return $ignoreRequest;
    }
}

?>