<?
$MAX_WORD_LENGTH
= 50;

  
//COMMON WORD LIST
$COMMON_WORDS = array("a"=>1,"as"=>1,"any"=>1,"all"=>1,"ate"=>1,"after"=>1,"am"=>1,"an"=>1,"and"=>1,"are"=>1,"at"=>1,"away"=>1,"about"=>1,"ago"=>1,"almost"=>1,"along"=>1,"answer"=>1,"anybody"=>1,"anywhere"=>1,"arent"=>1,"around"=>1,"ask"=>1,"also"=>1,
                  
"b"=>1,"be"=>1,"better"=>1,"black"=>1,"brown"=>1,"but"=>1,"both"=>1,"bring"=>1,"because"=>1,"been"=>1,"before"=>1,"big"=>1,"blue"=>1,"best"=>1,"by"=>1,"beg"=>1,"bad"=>1,"being"=>1,"best"=>1,"between"=>1,"based"=>1,
                  
"c"=>1,"call"=>1,"can"=>1,"cut"=>1,"carry"=>1,"cold"=>1,"could"=>1,"clean"=>1,"cant"=>1,"come"=>1,"couldnt"=>1,"consider"=>1,"called"=>1,
                  
"d"=>1,"did"=>1,"does"=>1,"do"=>1,"down"=>1,"dont"=>1,"day"=>1,"didnt"=>1,
                  
"e"=>1,"eat"=>1,"every"=>1,"eve"=>1,"egg"=>1,"end"=>1,"eve"=>1,"era"=>1,"eye"=>1,"each"=>1,"either"=>1,"else"=>1,"even"=>1,"ever"=>1,"every"=>1,"everybody"=>1,"everyone"=>1,
                  
"f"=>1,"for"=>1,"from"=>1,"full"=>1,"found"=>1,"far"=>1,"fly"=>1,"fall"=>1,"first"=>1,"fast"=>1,"five"=>1,"fall"=>1,"find"=>1,"four"=>1,"funny"=>1,
                  
"g"=>1,"go"=>1,"get"=>1,"goes"=>1,"give"=>1,"gun"=>1,"good"=>1,"god"=>1,"give"=>1,"got"=>1,"green"=>1,"grow"=>1,"good"=>1,"green"=>1,"grow"=>1,"got"=>1,"gave"=>1,"going"=>1,"gone"=>1,"given"=>1,
                  
"h"=>1,"hi"=>1,"hoo"=>1,"he"=>1,"his"=>1,"him"=>1,"her"=>1,"has"=>1,"how"=>1,"hold"=>1,"how"=>1,"hot"=>1,"had"=>1,"here"=>1,"help"=>1,"hurt"=>1,"have"=>1,"havet"=>1,"having"=>1,"hers"=>1,"home"=>1,"home"=>1,"href"=>1,
                  
"i"=>1,"in"=>1,"is"=>1,"if"=>1,"its"=>1,"i"=>1,"it"=>1,"into"=>1,"im"=>1,"ill"=>1,"id"=>1,
                  
"j"=>1,"just"=>1,"jump"=>1,"jet"=>1,"jaw"=>1,"jar"=>1,"jag"=>1,"jam"=>1,"job"=>1,"jog"=>1,"joy"=>1,"jot"=>1,
                  
"k"=>1,"kind"=>1,"keep"=>1,"kiss"=>1,"kinder"=>1,"kind"=>1,"kid"=>1,"key"=>1,"kit"=>1,"ken"=>1,"know"=>1,
                  
"l"=>1,"like"=>1,"little"=>1,"lust"=>1,"led"=>1,"lap"=>1,"let"=>1,"live"=>1,"long"=>1,"live"=>1,"let"=>1,"look"=>1,"law"=>1,"leg"=>1,"lie"=>1,"lid"=>1,"less"=>1,"look"=>1,"looking"=>1,
                  
"m"=>1,"my"=>1,"may"=>1,"me"=>1,"many"=>1,"must"=>1,"much"=>1,"made"=>1,"my"=>1,"make"=>1,"met"=>1,"mix"=>1,"mom"=>1,"mud"=>1,"mug"=>1,"mum"=>1,"myself"=>1,"more"=>1,"most"=>1,"max"=>1,"maximun"=>1,
                  
"n"=>1,"no"=>1,"nose"=>1,"not"=>1,"new"=>1,"now"=>1,"nor"=>1,"nod"=>1,"now"=>1,"nil"=>1,"nib"=>1,"nut"=>1,"nun"=>1,"never"=>1,"near"=>1,"news"=>1,"none"=>1,"nothing"=>1,"next"=>1,
                  
"o"=>1,"of"=>1,"on"=>1,"or"=>1,"old"=>1,"open"=>1,"once"=>1,"only"=>1,"off"=>1,"our"=>1,"oops"=>1,"out"=>1,"oil"=>1,"old"=>1,"oak"=>1,"oak"=>1,"ohm"=>1,"oho"=>1,"ore"=>1,"owl"=>1,"often"=>1,"other"=>1,"ours"=>1,"out"=>1,"over"=>1,"one"=>1,
                  
"p"=>1,"play"=>1,"pull"=>1,"pretty"=>1,"put"=>1,"push"=>1,"pad"=>1,"pop"=>1,"pan"=>1,"pap"=>1,"pay"=>1,"peg"=>1,"pet"=>1,"phi"=>1,"pie"=>1,"pig"=>1,"pet"=>1,"pub"=>1,"pin"=>1,"pit"=>1,"ply"=>1,"pod"=>1,"pus"=>1,"page"=>1,"please"=>1,
                  
"q"=>1,"question"=>1,"quick"=>1,"quest"=>1,
                  
"r"=>1,"ran"=>1,"red"=>1,"run"=>1,"ride"=>1,"read"=>1,"rag"=>1,"rat"=>1,"ran"=>1,"ram"=>1,"red"=>1,"ray"=>1,"rev"=>1,"rid"=>1,"rib"=>1,"rig"=>1,"rim"=>1,"rip"=>1,"rob"=>1,"rod"=>1,"roe"=>1,"row"=>1,"rum"=>1,"rug"=>1,"rut"=>1,"rather"=>1,"recent"=>1,
                  
"s"=>1,"so"=>1,"some"=>1,"stop"=>1,"say"=>1,"sing"=>1,"say"=>1,"she"=>1,"stay"=>1,"said"=>1,"start"=>1,"soon"=>1,"six"=>1,"seven"=>1,"see"=>1,"sit"=>1,"sitting"=>1,"son"=>1,"soap"=>1,"spy"=>1,"sum"=>1,"say"=>1,"sea"=>1,"sex"=>1,"shy"=>1,"sib"=>1,"sic"=>1,"sin"=>1,"sip"=>1,"sir"=>1,"sky"=>1,"ski"=>1,"sly"=>1,"sob"=>1,"sow"=>1,"sod"=>1,"should"=>1,"something"=>1,"sometime"=>1,"somewhere"=>1,"set"=>1,"simple"=>1,"such"=>1,"side"=>1,
                  
"t"=>1,"to"=>1,"the"=>1,"then"=>1,"that"=>1,"this"=>1,"those"=>1,"than"=>1,"these"=>1,"those"=>1,"they"=>1,"thank"=>1,"tank"=>1,"tell"=>1,"take"=>1,"together"=>1,"try"=>1,"today"=>1,"three"=>1,"tie"=>1,"thy"=>1,"tax"=>1,"tea"=>1,"tap"=>1,"taxi"=>1,"ten"=>1,"tin"=>1,"tip"=>1,"tit"=>1,"toe"=>1,"tog"=>1,"tom"=>1,"ton"=>1,"top"=>1,"tow"=>1,"toy"=>1,"two"=>1,"tub"=>1,"tug"=>1,"tun"=>1,"tux"=>1,"true"=>1,"thank"=>1,"theirs"=>1,"them"=>1,"there"=>1,"though"=>1,"through"=>1,"thus"=>1,"time"=>1,"times"=>1,"too"=>1,"type"=>1,
                  
"u"=>1,"use"=>1,"us"=>1,"using"=>1,"usage"=>1,"useful"=>1,"up"=>1,"upon"=>1,"ups"=>1,"under"=>1,"until"=>1,"untrue"=>1,"users"=>1,
                  
"v"=>1,"van"=>1,"vex"=>1,"via"=>1,"vow"=>1,"vat"=>1,"vim"=>1,"version"=>1,"very"=>1,
                  
"w"=>1,"was"=>1,"waste"=>1,"why"=>1,"who"=>1,"whose"=>1,"well"=>1,"walk"=>1,"were"=>1,"which"=>1,"wish"=>1,"white"=>1,"with"=>1,"would"=>1,"write"=>1,"when"=>1,"what"=>1,"wash"=>1,"warm"=>1,"want"=>1,"went"=>1,"will"=>1,"won"=>1,"woe"=>1,"wow"=>1,"woo"=>1,"wins"=>1,"where"=>1,"web"=>1,"way"=>1,"were"=>1,"where"=>1,"whom"=>1,"wide"=>1,"within"=>1,"without"=>1,"world"=>1,"worse"=>1,"worst"=>1,"www"=>1,"we"=>1,"whether"=>1,
                  
"y"=>1,"yes"=>1,"ya"=>1,"you"=>1,"yellow"=>1,"your"=>1,"yet"=>1,"yen"=>1,"year"=>1,"yep"=>1,"yon"=>1,"yours"=>1,
                  
"z"=>1,"zoo"=>1,"zip"=>1,"zed"=>1,"zinc"=>1,"zoom"=>1,"zero"=>1,"zeal"=>1,"zone"=>1);


$allWords = array();

if(
$submit){

    global
$allWords;

    
mysql_connect( "localhost", "root", "" ) or die( "Unable to connect to database" );
    
mysql_select_db( "test" ) or die( "Unable to select database" );

    
LoadCurrentWords();


    if (
$title and $body){
            
ProcessForm($title ,$body);
            echo
"Successfully Finished Parsing and Uploading Content";
        }else{
           
$err="Please fill in the fields to upload\n";
           
form($err);
       }
}else{
//end of main
       
form($err);
}

function
form($errmsg)
{  
?>
   <h4 align="center">File Parser & Uploader</h4>
   <div align="center"><b><? echo $errmsg; ?></b></div>
   <center>
   <form method="POST" action=<? echo $PHP_SELF ?>>
   Title:   <input type="text" name="title" size="50" maxlength="100"><p>
   Abstract: <textarea rows=20 cols=50 wrap="off" name="body"></textarea><p>
     <input type="submit" name="submit" value="Start Parsing and Upload Content">
   </table>
   </form>

   </center>
<?
}

function
LoadCurrentWords(){
global
$allWords;

    
$result = mysql_query( "select keyid, keyword from keytable" ) or die( "Error in executing mysql query" );

    while (
$row = mysql_fetch_array($result) ) {
        
$allWords[$row['keyword']] = $row['keyid'];
    }
}


function
ExtractWords($text){
    
$STATE0 = 0;  //Numeric / Other Characters
    
$STATE1= 1;   //Alpha Characters
    
$state = $STATE0;

    
$wordList = array();
    
$curWord = "";

    for (
$i = 0; $i < strlen($text); ++$i ) {
        
$ch = $text{$i};
        
$isAlpha = ctype_alpha( $ch );

        if (
$state == $STATE0) {
            if (
$isAlpha ) {
                
$curWord = $ch;
                
$state = $STATE1;
            }
        }
        else if (
$state == $STATE1) {
            if (
$isAlpha ) {
                
$curWord .= $ch;
            }
            else {
                
$wordList[] = strtolower( $curWord );
                
$state = $STATE0;
            }
        }
    }

    if (
$state == $STATE1) {
        
$wordList[] = strtolower( $curWord );
    }

    return
$wordList;
}

function
FilterCommonAndDuplicateWords( $wordList ) {
    global
$COMMON_WORDS;
    global
$MAX_WORD_LENGTH;

    
$wordMap = array();

    foreach (
$wordList as $word ) {
        
$len = strlen( $word );
        if ( (
$len > 1) && ($len < $MAX_WORD_LENGTH) ) {
            if ( !
$wordMap[$word] ) {
                if ( !
$COMMON_WORDS[$word] ) {
                    
$wordMap[$word] = 1;
                }
            }
        }
    }

    return
$wordMap;
}

function
ProcessForm($title ,$body){

global   
$allWords;

$tempWordList = ExtractWords( $body );
$wordList = FilterCommonAndDuplicateWords($tempWordList);

// insert into content
mysql_query( sprintf( "INSERT INTO content (title, abstract) VALUES ('%s', '%s')",
mysql_escape_string($title), mysql_escape_string($body) ) );

//store the newly generated content id in $contentId
$contentId = mysql_insert_id();

    
// insert all the new words and links
    
while(list($word,$val)=each($wordList)) {
        
$keyId = "";
        if ( !
$allWords[$word] ) {
            
mysql_query( sprintf( "INSERT INTO keytable ( keyword ) VALUES ( '%s' )",
                
mysql_escape_string($word) ) );

            
$keyId = mysql_insert_id();
            
$allWords[$word] = $keyId;
        }
        else {
            
$keyId = $allWords[$word];
        }

        
// insert the link
        
mysql_query( sprintf( "INSERT INTO link (keyid, contid) VALUES ( %d, %d )", $keyId, $contentId ) );
    }
//End of Processing Form.

}
?>