Version: 1.0

Type: Class

Category: File Management

License: GNU General Public License

Description: A powerful RSS 2.0 parser able to capture enclosures, ENT, dublin core(db:*), rvw, and much more...



<?

/******************************************************************************************************************
*Date: October 21, 2004
*Author: Dan Cochran
*
*Description: This will gather all basic information contained in a rss2.0 feed. Along with basic functionality, 
*             the RSSParser will also parse most commonly used Dublin Core tags along with enlcosure attributes, 
*             Well-Fomed comments RSS tag, image, and rvw tags. It will also recover ENT(Easy News Topic) tags as well.
*             Please see the example at the bottom to see how to use the class. and most importantly , have fun!
*              
*email me with question or comments/bugs @ dan@deecodameeko.com
* url: http://deecodameeko.com/code/
*
*Disclaimer: 
*Copyright (C) 2005  Dan Cochran
*
*This program is free software; you can redistribute it and/or
*modify it under the terms of the GNU General Public License
*as published by the Free Software Foundation; either version 2
*of the License, or (at your option) any later version.
*
*This program is distributed in the hope that it will be useful,
*but WITHOUT ANY WARRANTY; without even the implied warranty of
*MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*GNU General Public License for more details.
*
*You should have received a copy of the GNU General Public License
*along with this program; if not, write to the Free Software
*Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
******************************************************************************************************************/


class RSSParser{

   var $insideitem = false;
   var $insideimage = false;
   var $insidervw = false;
   var $insiderating = false;
   var $insidetext = false;
   var $url = "";
   var $enclosure = '';
   var $ent = array();
   
   var $cTitle, $cLink, $cDesc, $cLanguage, $cCopyright, $cManageEditor, $cWebmaster, $cLastBuild, $cRating, $cDocs, $cCategory,  $cGenerator, $cPubDate;
   
   var $imTitle, $imUrl, $imLink, $imWidth, $imHeight;
   
   var $iTitle, $iLink, $iDesc, $iAuthor, $iComments, $iEnclosure, $iGuid, $iPudDate, $iSource, $iCloud;
   
   var $iCategory = array();
   
   var $rvLink;
   
   var $dcIdentifier, $dcType, $dcTitle, $dcCreator, $dcPublisher, $dcDate, $dcmin, $dcmax, $dcvalue, $dcContributor, $dcFormat,
       $dcSubject, $dcSource, $dcLanguage, $dcRelation, $dcCoverage, $dcRights, $dcDescription;
   
   var $contenEncoded, $wfwComment;
   
   var $tTitle, $tLink, $tDesc, $tName;
   
   var $result;
    
   function parse($url, $rss_parser){
   	
   	$xml_parser = xml_parser_create();
    //$rss_parser = new RSSParser();
    xml_set_object($xml_parser,&$rss_parser);
    xml_set_element_handler($xml_parser, "startElement", "endElement");
    xml_set_character_data_handler($xml_parser, "characterData");
    if($fp = fopen($url,"r")){
   
     while ($data = fread($fp, 4096)){
       xml_parse($xml_parser, $data, feof($fp)); //or die(sprintf("XML error: %s at line %d",  xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
     }
     
      fclose($fp);
      xml_parser_free($xml_parser);
    }
    else{
     echo  "error, could not open $url\n";	
    }
   }
   
   function get_channel(){   	       
        return($GLOBALS['channel']);
   }
   
   function get_items(){
   	   return($GLOBALS['items']); 
   }
   
   function get_image(){
   	   if(isset($GLOBALS['image'])){
   	   	return($GLOBALS['image']);
   	   }
   }
     
   function get_rvw(){
   	   if(isset($GLOBALS['rvw'])){
        return($GLOBALS['rvw']);
   	   }
   }

   function get_rating(){
   	   if(isset($GLOBALS['rating'])){
   	    return($GLOBALS['rating']);
   	   }
   }
   
   function get_textinput(){
   	   if(isset($GLOBALS['textinput'])){
   	    return($GLOBALS['textinput']);
   	   }
   }
   
   function startElement($parser, $tagName, $attrs) {
   	
       $this->tag = $tagName;
       
   	   if($tagName == "ITEM") {
           $this->insideitem = true;
       }elseif($tagName == "IMAGE") {
       	   $this->insideimage = true;
       }elseif($tagName == "RVW:ITEM"){
       	   $this->insidervw = true;
       }elseif ($tagName == "RVW:RATING"){
       	   $this->insiderating = true;
       }elseif($tagName == 'TEXTINPUT'){
       	   $this->insidetext = true;
       }
       
       if($tagName == "ENCLOSURE") {
          $this->enclosure = $attrs;          
       }elseif ($tagName == 'ENT:TOPIC'){
          $this->ent = $attrs;         
       }
       elseif($tagName == 'ENT:CLOUD'){
       	  $this->iCloud = $attrs;
       }       
   }

   function characterData($parser, $data) {
       if ($this->insideitem) {
           switch ($this->tag) {
               case "TITLE":
               $this->iTitle .= $data;
               $this->i++;
               break;
               case "DESCRIPTION":
               $this->iDesc .= $data;
               break;
               case "LINK":
               $this->iLink .= $data;
               break; 
               case "AUTHOR":
               $this->iAuthor .= $data;
               break;
               case "CATEGORY":
               $this->iCategory[] .= $data;
               break;
               case "COMMENTS":
               $this->iComments .= $data;
               break;
               case "PUBDATE":
               $this->iPudDate .= $data;
               break;
               case "SOURCE":
               $this->iSource .= $data;
               break;
               case "GUID":
               $this->iGuid .= $data;
               break;
               case "DC:CONTRIBUTOR":
               $this->dcContributor .= $data;
               break;
               case "DC:FORMAT":
               $this->dcFormat .= $data;
               break;
               case "DC:RELATION":
               $this->dcRelation .= $data;
               break;
               case "DC:COVERAGE":
               $this->dcCoverage .= $data;
               break;
               case "DC:IDENTIFIER":
               $this->dcIdentifier .= $data;
               break;
               case "DC:TYPE":
               $this->dcType .= $data;
               break; 
               case "DC:TITLE":
               $this->dcTitle .= $data;
               break;
               case "DC:CREATOR":
               $this->dcCreator .= $data;
               break;
               case "DC:PUBLISHER":
               $this->dcPublisher .= $data;
               break;
               case "DC:DATE":
               $this->dcDate .= $data;
               break;
               case "DC:SUBJECT":
               $this->dcSubject .= $data;
               break;
               case "DC:SOURCE":
               $this->dcSource .= $data;
               break;
               case "DC:LANGUAGE":
               $this->dcLanguage .= $data;
               break;
               case "DC:RIGHTS":
               $this->dcRights .= $data;
               break;
               case "DC:DESCRIPTION":
               $this->dcDescription .= $data;
               break;
               case "CONTENT:ENCODED":
               $this->contenEncoded .= $data;
               break;
               case "WFW:COMMENTRSS":
               $this->wfwComment .= $data;
               break;
           }
                  
       }
       if($this->insideimage){
       	   switch ($this->tag) {
               case "TITLE":
               $this->imTitle .= $data;
               break;
               case "URL":
               $this->imUrl .= $data;
               break;
               case "LINK":
               $this->imLink .= $data;
               break; 
               case "WIDTH":
               $this->imWidth .= $data;
               break;
               case "HEIGHT":
               $this->imHeight .= $data;
               break;
       	   }        	 
       }
       if($this->insidervw){
           switch ($this->tag) {
               case "RVW:LINK":
               $this->rvLink .= $data;
               break;               
       	   }  
       }
       if($this->insiderating){
           switch ($this->tag) {
               case "RVW:MINIMUM":
               $this->rvmin .= $data;
               break;
               case "RVW:MAXIMUM":
               $this->rvmax .= $data;
               break;
               case "RVW:VALUE":
               $this->rvvalue .= $data;
               break; 
           }
       }
       if($this->insidetext){
       	    switch ($this->tag) {
               case "TITLE":
               $this->tTitle .= $data;
               break;
               case "DESCRIPTION":
               $this->tDesc .= $data;
               break;
               case "LINK":
               $this->tLink .= $data;
               break; 
               case "NAME":
               $this->tName .= $data;
               break;               
       	   }         	
       }
       if(!$this->insideitem && ! $this->insidervw && !$this->insiderating && !$this->insidetext && !$this->insideimage){
       		switch ($this->tag) {
               case "TITLE":
               $this->cTitle .= $data;
               break;
               case "LINK":
               $this->cLink .= $data;
               break;
               case "DESCRIPTION":
               $this->cDesc .= $data;
               break; 
               case "LANGUAGE":
               $this->cLanguage .= $data;
               break;
               case "COPYRIGHT":
               $this->cCopyright .= $data;
               break;
               case "MANAGINGEDITOR":
               $this->cManageEditor .= $data;
               break;
               case "WEBMASTER":
               $this->cWebmaster .= $data;
               break;
               case "LASTBUILDDATE":
               $this->cLastBuild .= $data;
               case "GENERATOR":
               $this->cGenerator .= $data;
               break;
               case "RATING":
               $this->cRating .= $data;
               break;
               case "DOCS":
               $this->cDocs .= $data;
               break;   
               case "CATEGORY":
               $this->cCategory .= $data; 
               break;
               case "PUBDATE":
               $this->cPubDate .= $data;          
       	   }       	      	
       	 
       }
   }
   
   function endElement($parser, $tagName) {
   	   
       if ($tagName == "ITEM") {
           
           $this->result['item']["title"] = $this->iTitle;
           $this->result['item']["description"] = $this->iDesc;
           $this->result['item']["link"] = $this->iLink;
           $this->result['item']["author"] = $this->iAuthor;
           $this->result['item']['category'] = $this->iCategory;
           $this->result['item']['comments'] = $this->iComments;
           $this->result['item']['pubdate'] = $this->iPudDate;
           $this->result['item']["source"] = $this->iSource;             
           $this->result['item']['enclosure'] = $this->enclosure;
           $this->result['item']['guid'] = $this->iGuid;  
           $this->result['item']['ent'] = $this->ent;
           $this->result['item']['cloud'] = $this->iCloud;   
           $this->result['item']['dc:identifier'] = $this->dcIdentifier;
           $this->result['item']['dc:type'] = $this->dcType;
           $this->result['item']['dc:title'] = $this->dcTitle;
           $this->result['item']['dc:creator'] = $this->dcCreator;
           $this->result['item']['dc:publisher'] = $this->dcPublisher;
           $this->result['item']['dc:date'] = $this->dcDate;  
           $this->result['item']['dc:contributor'] = $this->dcContributor;
           $this->result['item']['dc:format'] = $this->dcFormat;
           $this->result['item']['dc:source'] = $this->dcSource;
           $this->result['item']['dc:language'] = $this->dcLanguage;
           $this->result['item']['dc:relation'] = $this->dcRelation;
           $this->result['item']['dc:coverage'] = $this->dcCoverage;
           $this->result['item']['dc:rights'] = $this->dcRights;
           $this->result['item']['dc:desc'] = $this->dcDescription;      
           $this->result['item']['dc:subject'] = $this->dcSubject;    
           $this->result['item']['content:encoded'] = $this->contenEncoded;
           $this->result['item']['wfw'] = $this->wfwComment;
                       
           $GLOBALS['items'][] = $this->result['item'];
           $this->iTitle = "";
           $this->iDesc = "";
           $this->iLink = "";
           $this->iAuthor = "";
           $this->iCategory = array();
           $this->iComments = '';
           $this->iPudDate = "";
           $this->iSource = ""; 
           $this->enclosure = "";
           $this->guid = "";
           $this->iGuid = '';
           $this->ent = array();
           $this->iCloud = array();
           $this->dcLink = "";
           $this->dcIdentifier = "";
           $this->dcType = "";
           $this->dcTitle = "";
           $this->dcCreator = "";
           $this->dcPublisher = "";
           $this->dcDate = "";   
           $this->dcContributor = "";
           $this->dcFormat = "";
           $this->dcSource = "";
           $this->dcLanguage = "";
           $this->dcRelation = "";
           $this->dcCoverage = "";
           $this->dcRights = "";
           $this->dcDescription = "";                
           $this->dcSubject = "";
           $this->contenEncoded = "";
           $this->wfwComment = "";
           $this->insideitem = false;          
       }
       if ($tagName == 'IMAGE'){

           $this->result['image']['title'] = $this->imTitle;
           $this->result['image']['url'] = $this->imUrl;
           $this->result['image']['link'] = $this->imLink;
           $this->result['image']['width'] = $this->imWidth;
           $this->result['image']['height'] = $this->imHeight;	 
           

           $GLOBALS['image'] = $this->result['image'];
           $this->imTitle = "";
           $this->imUrl = "";
           $this->imLink = "";
           $this->imWidth = "";
           $this->imHeight = "";           
           $this->insideimage = false;
           
       }
       if ($tagName == 'RVW:ITEM'){
       	
       	   $this->result['rvw']['link'] = $this->rvLink;                     
                    
           $GLOBALS['rvw'][$i] = $this->result['rvw'];               
           $this->insidervw = false;
       }
       if ($tagName == 'RVW:RATING'){
       	
       	   $this->result['rating']['min'] = $this->rvmin;
           $this->result['rating']['max'] = $this->rvmax;
           $this->result['rating']['value'] = $this->rvvalue;
                              
           $GLOBALS['rating'][$i] = $this->result['rating'];
           $this->rvmin = "";
           $this->rvmax = "";
           $this->rvvalue = "";
           $this->insiderating = false;
       }
       if($tagName == 'CHANNEL'){
       	    $this->result['channel']['title'] = $this->cTitle;
            $this->result['channel']['link'] = $this->cLink;
            $this->result['channel']['desc'] = $this->cDesc;
            $this->result['channel']['lang'] = $this->cLanguage;
            $this->result['channel']['copy'] = $this->cCopyright;
            $this->result['channel']['editor'] = $this->cManageEditor;
            $this->result['channel']['webmaster'] = $this->cWebmaster;
            $this->result['channel']['lastbuild'] = $this->cLastBuild;
            $this->result['channel']["generator"] = $this->cGenerator; 
            $this->result['channel']['rating'] = $this->cRating;
            $this->result['channel']['docs'] = $this->cDocs;
            $this->result['channel']['category'][] = $this->cCategory;
            $this->result['channel']['pubDate'] = $this->cPubDate;
       	                
            $GLOBALS['channel'] = $this->result['channel'];
            $this->cTitle = '';
            $this->cLink = '';
            $this->cDesc = '';
            $this->cLanguage = '';
            $this->cCopyright = '';
            $this->cManageEditor = '';
            $this->cWebmaster = '';
            $this->cLastBuild = '';
            $this->cGenerator = '';
            $this->cRating = '';
            $this->cDocs = '';  	
            $this->cPubDate = '';   
       }
       if ($tagName == 'TEXTINPUT'){

           $this->result['textinput']['title'] = $this->tTitle;
           $this->result['textinput']['name'] = $this->tName;
           $this->result['textinput']['link'] = $this->tLink;
           $this->result['textinput']['description'] = $this->tDesc;      

           $GLOBALS['textinput'] = $this->result['textinput'];
           $this->tTitle = "";
           $this->tDesc = "";
           $this->tLink = "";
           $this->tName = "";
           $this->insidetext = false;           
       }
   }

}//end class

//sample implementation the class
/*
$rs = new RSSParser(); //create a new instance
$rs->parse($url_or_filename, $rs);
$rss['channel'] = $rs->get_channel();
$rss['items'] = $rs->get_items();
$rss['image'] = $rs->get_image();
$rss['rvws'] = $rs->get_rvw();
$rss['ratings'] = $rs->get_rating();
$rss['textinput'] = $rs->get_textinput();

print_r($rss["channel"]);
print_r($rss["items"]);*/
?>