|
Comments for: dhar20040217
| Message # 1018951: |
|
Date: 02/18/04 04:40
By: Armand Turpel Profile Subject: Keytable with fixed rows length Here an other approache to store keywords: In this table the crc32 checksum of a keyword is taken and not the whole keyword it self. Searching in such a table is much more faster because the table has fixed rows length and the table take less storage space. CREATE TABLE keytable ( keyid int NOT NULL auto_increment, keyword int NOT NULL); The following script extract from a given text file the keywords and print out: Keyword => Occurrence => crc32 Checksum You have to define at the bottom of the script an existing text file. <?php class index { /** * Default word delimiters * */ var $_delimiters = " \"'<>;-.!`#+*~´&\$\\?,:(){}[]%/"; /** * Mimimal default word length * */ var $_word_length = 3; /** * Words to ignore list * */ var $_ignore_words = array( 'also', 'and', 'any', 'are', 'been', 'being', 'both', 'bring', 'but', 'can', 'comes', 'could', 'does'); /** * Constructor * @param int $word_length Minimal word length * @param string $delimiters Word delimiters */ function index( $word_length = 3, $delimiters = FALSE ) { if(is_string($delimiters)) { $this->_delimiters = $delimiters; } $this->_word_length = $word_length; } /** * Extract the words from a text string * * @param string $string Text string */ function & _split_words( & $string ) { $content_array = array(); $string = preg_replace("/\n|\r/", " ", $string); $tok = strtok($string, " "); while (FALSE !== $tok) { $word = trim($tok); if( strlen($word) >= $this->_word_length ) { $word = strtolower($word); if(count($this->_ignore_words) == 0) { $content_array[] = htmlentities($word); } elseif(FALSE === in_array($word, $this->_ignore_words)) { $content_array[] = htmlentities($word); } } $tok = strtok ($this->_delimiters); } return $content_array; } /** * Print out (key)words of a text string * * @param string $content Text string */ function show_keywords( $content ) { $_content = $this->_split_words( $content ); sort($_content); // Occurrence of words $array_content = array_count_values($_content); while (list ($key, $val) = each ($array_content)) { echo $key.' => '.$val.' '.crc32($key).'<br>'; } } } // Print out: // Keyword => Occurrence => crc32 Checksum // $index = & new index(); // Define an existing text file here // $content = implode("", file ("a_text_file.txt")); // Show keywords of a text $index->show_keywords( $content ); ?> |
Previous Message | Next Message |
| Comments: | ||
| Ä«/µå/µ¹·Á¸·±â·Î/¸Á°¡Áö½ÅºÐ/²À º¸¼¼¿ä! | ÀÌÇýÁø | 12/05/04 00:00 |
| Ä«,µå,¿¬,ü,ÀÚ/¿¹.Á¤.ÀÚ ´ë,Ãâ 100-1000¸¸¿ø | ÀÌ´ÙÇö | 12/03/04 00:16 |
| ½Å.¿ë.ºÒ.·®/Ä«.µå.¿¬.ü/´ë.Ãâ/È¥ÀÚ/ÇØ.°áÇÏ´Â/¹æ.¹ý | ±èÇö¼ | 12/02/04 20:22 |
| ´ë'Ãâ'°Å'Àý'½Ã'100%µÇ'°Ô'ÇÏ'´Â'¹æ'¹ý | ÇѰæ¿í | 11/29/04 11:58 |
| ½Å.¿ë.ºÒ.·®.ÀÚ/´çÀÏ500/´ë.Ãâ.ºñ.¹ý | ÀÌÈñÁø | 11/21/04 20:40 |
| ½Å.¿ëºÒ.·®ÀÚ°¡ ¾Ë¾Æ¾ßÇÒ Á¤.º¸ ´ë.°ø.°³ | ÀÌ´ÙÁø | 11/20/04 07:13 |
| ½Å.¿ë.ºÒ.·®.ÀÚ/´çÀÏ500/´ë.Ãâ.ºñ.¹ý | ÀÌÈñÁø | 11/20/04 05:54 |
| We already have | Andrew Rodland | 03/23/04 15:23 |
| RE: More easy with MySQL FullText | Armand Turpel | 02/26/04 03:39 |
| More easy with MySQL FullText | Pablo Almunia | 02/23/04 19:31 |
| Keytable with fixed rows length | Armand Turpel | 02/18/04 04:40 |
|
If you are looking for help, please post on the appropriate forum here. Your questions will be answered much more quickly. | ||


