Version: 1.0

Type: Function

Category: Algorithms

License: GNU General Public License

Description: This will generate all possible n-grams for a word and returns an array of all unique n-grams. The function takes two arguments: $word = the word and $min_gram_length = the smallest n-gram string length you would like to produce. So, ngrams('hello', 2) would produce the following values in an array: he el ll lo hel ell llo hell ello. This function is useful if you are creating a word index and would like to have the ability to search for substrings without using LIKE %word%.

 



function ngrams($word, $min_gram_length = 2) {
        $ngrams = array();
		$word = trim($word);
        $len = strlen($word);
		$max_gram_length = $len - 1;
        
		//BEGIN N-GRAM SIZE LOOP $a
		
		for ($a = $min_gram_length; $a <= $max_gram_length; $a++) { //BEGIN N-GRAM SIZE LOOP $a
			
			for ($pos = 0; $pos < $len; $pos ++ {  //BEGIN POSITION WITHIN WORD $pos
				
				if(($pos + $a -1) < $len) {  //IF THE SUBSTRING WILL NOT EXCEED THE END OF THE WORD
				
				$ngrams[] = substr($word, $pos, $a);

				}  //END IF THE SUBSTRING WILL NOT EXCEED THE END OF THE WORD
			
			} //END POSITION WITHIN WORD $pos
		
		}  //END N-GRAM SIZE LOOP $a
		
		$ngrams = array_unique($ngrams);
		
return $ngrams;
}