1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296: 297: 298: 299: 300: 301: 302: 303: 304: 305: 306: 307: 308: 309: 310:
<?php
if (!defined('SMF'))
die('No direct access...');
class custom_search extends search_api
{
protected $indexSettings = array();
protected $bannedWords = array();
protected $min_word_length = null;
protected $supported_databases = array('mysql', 'postgresql');
public function __construct()
{
global $smcFunc, $modSettings, $db_type;
if (!in_array($db_type, $this->supported_databases))
{
$this->is_supported = false;
return;
}
if (empty($modSettings['search_custom_index_config']))
return;
$this->indexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
$this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
$this->min_word_length = $this->indexSettings['bytes_per_word'];
}
public function supportsMethod($methodName, $query_params = null)
{
$return = false;
switch ($methodName)
{
case 'isValid':
case 'searchSort':
case 'prepareIndexes':
case 'indexedWordQuery':
case 'postCreated':
case 'postModified':
$return = true;
break;
default:
$return = false;
}
if (!$return)
$return = parent::supportsMethod($methodName, $query_params);
return $return;
}
public function isValid()
{
global $modSettings;
return !empty($modSettings['search_custom_index_config']);
}
public function searchSort($a, $b)
{
global $excludedWords;
$x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
$y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
return $y < $x ? 1 : ($y > $x ? -1 : 0);
}
public function prepareIndexes($word, array &$wordsSearch, array &$wordsExclude, $isExcluded)
{
global $modSettings, $smcFunc;
$subwords = text2words($word, $this->min_word_length, true);
if (empty($modSettings['search_force_index']))
$wordsSearch['words'][] = $word;
if (count($subwords) > 1 && $isExcluded)
return;
else
{
foreach ($subwords as $subword)
{
if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords))
{
$wordsSearch['indexed_words'][] = $subword;
if ($isExcluded)
$wordsExclude[] = $subword;
}
}
}
}
public function indexedWordQuery(array $words, array $search_data)
{
global $modSettings, $smcFunc;
$query_select = array(
'id_msg' => 'm.id_msg',
);
$query_inner_join = array();
$query_left_join = array();
$query_where = array();
$query_params = $search_data['params'];
if ($query_params['id_search'])
$query_select['id_search'] = '{int:id_search}';
$count = 0;
foreach ($words['words'] as $regularWord)
{
$query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}';
$query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
}
if ($query_params['user_query'])
$query_where[] = '{raw:user_query}';
if ($query_params['board_query'])
$query_where[] = 'm.id_board {raw:board_query}';
if ($query_params['topic'])
$query_where[] = 'm.id_topic = {int:topic}';
if ($query_params['min_msg_id'])
$query_where[] = 'm.id_msg >= {int:min_msg_id}';
if ($query_params['max_msg_id'])
$query_where[] = 'm.id_msg <= {int:max_msg_id}';
$count = 0;
if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
foreach ($query_params['excluded_phrases'] as $phrase)
{
$query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}';
$query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
}
$count = 0;
if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
foreach ($query_params['excluded_subject_words'] as $excludedWord)
{
$query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}';
$query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
}
$numTables = 0;
$prev_join = 0;
foreach ($words['indexed_words'] as $indexedWord)
{
$numTables++;
if (in_array($indexedWord, $query_params['excluded_index_words']))
{
$query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)';
$query_where[] = '(lsw' . $numTables . '.id_word IS NULL)';
}
else
{
$query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)';
$query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord;
$prev_join = $numTables;
}
}
$ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ('
INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
(' . implode(', ', array_keys($query_select)) . ')') : '') . '
SELECT ' . implode(', ', $query_select) . '
FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : '
INNER JOIN ' . implode('
INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : '
LEFT JOIN ' . implode('
LEFT JOIN ', $query_left_join)) . '
WHERE ' . implode('
AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
$query_params
);
return $ignoreRequest;
}
public function postCreated(array &$msgOptions, array &$topicOptions, array &$posterOptions)
{
global $modSettings, $smcFunc;
$customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
$inserts = array();
foreach (text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true) as $word)
$inserts[] = array($word, $msgOptions['id']);
if (!empty($inserts))
$smcFunc['db_insert']('ignore',
'{db_prefix}log_search_words',
array('id_word' => 'int', 'id_msg' => 'int'),
$inserts,
array('id_word', 'id_msg')
);
}
public function postModified(array &$msgOptions, array &$topicOptions, array &$posterOptions)
{
global $modSettings, $smcFunc;
if (isset($msgOptions['body']))
{
$customIndexSettings = $smcFunc['json_decode']($modSettings['search_custom_index_config'], true);
$stopwords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
$old_body = isset($msgOptions['old_body']) ? $msgOptions['old_body'] : '';
$old_index = text2words($old_body, $customIndexSettings['bytes_per_word'], true);
$new_index = text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true);
$removed_words = array_diff(array_diff($old_index, $new_index), $stopwords);
$inserted_words = array_diff(array_diff($new_index, $old_index), $stopwords);
if (!empty($removed_words))
{
$removed_words = array_merge($removed_words, $inserted_words);
$smcFunc['db_query']('', '
DELETE FROM {db_prefix}log_search_words
WHERE id_msg = {int:id_msg}
AND id_word IN ({array_int:removed_words})',
array(
'removed_words' => $removed_words,
'id_msg' => $msgOptions['id'],
)
);
}
if (!empty($inserted_words))
{
$inserts = array();
foreach ($inserted_words as $word)
$inserts[] = array($word, $msgOptions['id']);
$smcFunc['db_insert']('insert',
'{db_prefix}log_search_words',
array('id_word' => 'string', 'id_msg' => 'int'),
$inserts,
array('id_word', 'id_msg')
);
}
}
}
}
?>