Version: 1.0
Type: Function
Category: Algorithms
License: GNU General Public License
Description: This will generate all possible n-grams for a word and returns an array of all unique n-grams. The function takes two arguments: $word = the word and $min_gram_length = the smallest n-gram string length you would like to produce. So, ngrams('hello', 2) would produce the following values in an array: he el ll lo hel ell llo hell ello. This function is useful if you are creating a word index and would like to have the ability to search for substrings without using LIKE %word%.
function ngrams($word, $min_gram_length = 2) {
$ngrams = array();
$word = trim($word);
$len = strlen($word);
$max_gram_length = $len - 1;
//BEGIN N-GRAM SIZE LOOP $a
for ($a = $min_gram_length; $a <= $max_gram_length; $a++) { //BEGIN N-GRAM SIZE LOOP $a
for ($pos = 0; $pos < $len; $pos ++ { //BEGIN POSITION WITHIN WORD $pos
if(($pos + $a -1) < $len) { //IF THE SUBSTRING WILL NOT EXCEED THE END OF THE WORD
$ngrams[] = substr($word, $pos, $a);
} //END IF THE SUBSTRING WILL NOT EXCEED THE END OF THE WORD
} //END POSITION WITHIN WORD $pos
} //END N-GRAM SIZE LOOP $a
$ngrams = array_unique($ngrams);
return $ngrams;
}