PHPBuilder - Detik.Usable



RSS Twitter
Snippets Http

Detik.Usable

by: dody wijaya
|
January 12, 2004

Version: 2.037

Type: Full Script

Category: HTTP

License: GNU General Public License

Description: Grab news and reformat to usable design, from Indonesia's foremost online news Detikcom at www.detik.com. It can also operate as proxy for other detik.usable, caching, and easily extended for other function.



<?
$app['name'] = "detik.usable";
$app['version'] = "2.037";

//  detik.usable: a fast-download detik.com
//	Author: dody suria wijaya - dswsh@plasa.com
//	License: THIS IS A PUBLIC DOMAIN CODE (you may even change the author -- see "Configuration"). 
//	Term of Usage: BY USING THIS SCRIPT, YOU TAKE FULL RESPONSIBILITY OF ANY LEGAL ACTION THAT MAY BE TAKEN. 

// CONFIGURATION
$wp_author = "<a href=mailto:dswsh@plasa.com>dody suria wijaya</a>";
$app['proxy_mode'] = FALSE;		//set this to TRUE to get data from other detik.usable nodes
$app['proxy_url'] = ""; 		//set this to other detik.usable node
$app['ads'] = TRUE;	//set this to TRUE to display advertisement
$app['hosted_by'] = get_current_user();//date ("F d Y H:i:s", getlastmod());
$app['zlib_support'] = extension_loaded('zlib');
$app['update_url'] = "http://detik.usable.web.id/detikusable-latest.php.txt";
$app['cache'] = TRUE;

//	VARIABLE DEFINITIONS

$hari = array('Minggu','Senin','Selasa','Rabu','Kamis',"Jum'at","Sabtu");
$bulan = array('','Januari','Februari','Maret','April','Mei','Juni','July','Agustus','September','Oktober','November','Desember');
$tgl_lengkap = $hari[date("w")].",&nbsp;".date("j")."&nbsp;".$bulan[date("n")].date(" Y")."&#151;".date("H:i")." WIB";
$self = $_SERVER['PHP_SELF'];
$x = $_REQUEST['x'];
$url = $_REQUEST['url'];
$as_node = $_REQUEST['as_node'];	//client request 1 to ask for serialized news array (default: compressed)
$uncompressed = $_REQUEST['uc'];	//client set this to 1 when requesting uncompressed stream
$query_string = $_SERVER['QUERY_STRING'];
$hostname = "www.detik.com";
$no = $_REQUEST['no'];

$header_ouput = "<html><head><title>detik.usable: berita cepat ($tgl_lengkap)</title><style>body{font-family:verdana;}.o{font-size:11pt;}.p{font-size:13pt;}h1{font-family:georgia;font-size:18pt;align:center;}.s{color:#991111;font-weight:bold;}</style></head><body bgcolor=#ffffff>";
$list_header_output = "<html><head><title>detik.usable: berita cepat ($tgl_lengkap)</title> <style>body{font-family:verdana,arial;}.i{font-size:10pt;font-weight:bold;font-family:verdana;}.j{font-family:arial;font-size:12pt;font-weight:bold;}.u{font-size:10pt;}.s{color:#991111;font-weight:bold;}a{text-decoration:none;}a:hover{text-decoration:underline;} .button {font-size:10pt;background:#D6E7EF;border-bottom:1px solid #104A7B;border-right:1px solid #104A7B;border-left: 1px solid #AFC4D5;border-top:1px solid #AFC4D5;color:#000066;margin:2;}.d{font-size:smaller;color:#555}#footer {font-size:smaller;clear:both;border:none;background:#e3ebe2;margin-top:20px;padding-left:30px;padding-top:10px;padding-bottom: 10px;}#footer a:link{color:#666666;} #footer a:active,.footer a:hover{color:#006486;}#footer a:visited{color:#949494;}</style></head><body bgcolor=#ffffff>";
$list_top_output = "<h3><a href=http://dsw.gesit.com/?id=du title=Home target=_top><span style=color:#991111;>detik</span>.<span style=color:#119911;>usable</span></a> @ <a href=".$self."?".$query_string." class=button>REFRESH</a><br>$tgl_lengkap</h3>";
$temp_pm = "Using Proxy: "; if ($app['proxy_mode']) $temp_pm .= 'Yeah'; else $temp_pm .= 'Nope';
$temp_zlib = "Compression Support: "; if ($app['zlib_support']) $temp_zlib .= 'Yeah'; else $temp_zlib .= 'Nope';
$list_footer = "<div id=footer><a href=mailto:dswsh@plasa.com target=_top>author</a> | <a href=\$temp_orig target=_top>Original page</a> | Generated by <a href=http://dsw.gesit.com/?id=du target=_top>{$app['name']} v{$app['version']}</a><BR><small>Host: {$app['hosted_by']} | $temp_pm | $temp_zlib | \$temp_stream</div></body></html>";
$error_cant_open = "<p>Unable to connect to Detikcom's server. This can be caused by this problems: <ul> <li>This webserver's IP has been blocked by Detikcom <li>Your webserver is behind firewall <li>Your PHP's setting has disabled socket connection-related functions <li>Detikcom is being swarmed by huge requests and really really busy <li>Detikcom's URL/port has been changed </ul> <p>What ever is the caused, I may not able to help you with this. Thank you. <p><a href=http://www.detik.com>Visit the original detik.com</a> $errstr ($errno)<br>";
$frameset_output = "<html><head><title>detik.usable: berita cepat ($tgl_lengkap)</title></head> <frameset cols=\"50%,*\"> <frame name=c target=m src=\"$self?x=i\"> <frame name=m target=_top src=\"$self?x=w\"> <noframes> <body>Looks like u need the <a href=$self?no=frame>non-frame version</a>.</body> </noframes> </frameset></html>";
$welcomepage_output = "<center><h1><a href=http://dsw.gesit.com title=\"detik.usable home\" target=_top style=text-decoration:none;> <span style=color:#991111;>detik</span>.<span style=color:#119911;>usable</span></a>: berita <i>cepat</i></h1> <p>Version {$app['version']}<p>dipersembahkan oleh $wp_author</p><p>Produk dari <a href=mailto:dswsh@plasa.com>dsw s/h</a></p></center> <hr><p>Situs ini ditujukan untuk mendemonstrasikan 'look and feel' dari potensi sebuah situs berita yg usable: cepat, bersih, dan mudah digunakan.<p>Bagi yang ingin nge-detik.usable, bisa copy n paste <a href=\"$self?x=s\" target=_top>source code situs ini</a> (public domain dan cuman 1 file) dan pasang di hosting apapun yg mendukung php, <b>untuk keperluan anda sendiri</b><ul><li><a href=$self?no=frame target=_top>Non-framed version</a> untuk pembenci frame<li><!--<a href=$self?au=1>Check update</a> versi terbaru--><li><a href=$self?cm=1>Pengaturan Cache</a></ul> $new_features</body></html>";

global $fp,$log,$news;

//	FUNCTIONS

	function add_log ($string)
	{
		global $log;
		$log[] = "<li>".htmlspecialchars($string);
	}

	function dump_log ()
	{
		global $log,$news;
		echo "<div style='border:thin solid #ffaaaa;background-color:#ffcccc;font-size:x-small;'><ul>".implode("\r\n",$log)."</ul></div>";
		echo "<pre>";
		print_r($news);
		echo "</pre>";
		exit;
	}

	function newsdetail_fetch ($pattern_start,$pattern_end)
	{
		global $fp;
		
	    while (!feof ($fp)) //skip non-content to make regmatching later much faster 
	    { 
	        $buffer = fgets($fp, 65536);            
	        if (preg_match($pattern_start,$buffer)) break;
	    }
	    
	    while (!feof ($fp)) //start collecting data until designated sign found
	    {            
	        $buffer = fgets($fp, 65536);            
	        if (preg_match($pattern_end,$buffer)) break;
	        $buffers .= $buffer;
	    }
	    
	    return $buffers;	
	}
	
	function socket_open ($host,$port)
	{
		global $fp,$error_cant_open;
		$timeout = 30; //seconds
		add_log("sock_open: $host/$port ($timeout s timeout)");
		$fp = @fsockopen ($host,$port,$errno,$errstr,$timeout);
	    if (!$fp)
	    {
	    	add_log("sock_open: can't connect");
	    	if ($errno == 0)
	    	{
	    		add_log("sock_open: problem before connect (dns/socket)");
	    		//write_error("Problems occured at pre-connection phase. Some causes: <ol><li>I can't resolve $host hostname to IP address, or <li>I can't create a socket in this computer</ol>");
	    		dump_log();
	    	}
	    	else
	    	{
	    		add_log("sock_open: problem trying to connect (hostname notfound, blocked, downed, busy, or timeout)");
	    		write_error("$errno \"$errstr\".");
	    		dump_log();
	    	}
	        
	        return FALSE;
	    } 
	    else
	    {
	    	add_log("sock_open: connected");
	    	return TRUE;
	    }
	}
	
	function sock_send_request ($hostname,$location)
	{
		global $fp;
        $http_req = "GET $location HTTP/1.0\r\nHost: $hostname\r\nReferer: http://www.detik.com\r\nConnection:close\r\n\r\n";
        add_log("http_req: $http_req");
        $return = fputs ($fp, $http_req);
        if ($return == -1)
        {
        	add_log("http_req: can't send");
        	return FALSE;
        }
        else 
        {
        	add_log("http_req: sent");
        	return TRUE;
        }
	}
	
	function sock_recv_header ()
	{
		global $fp,$http_req;
		
		add_log("http_resp_header: receiving...");
	    while (!feof ($fp)) 
	    { 
	        $buffer = fgets($fp, 65536 );
	        if ($buffer == "\r\n") break;
	        $buffers .= $buffer;
	    }
	    add_log("http_resp_header: $buffers");
	    //validate buffer
	    if (!preg_match("/200 OK/",$buffers))
	    {
	    	write_error("Invalid HTTP Response");
	    	dump_log();
	    	return false;	    		
	    }
	    else
	    {
	    	add_log("http_resp_header: 200 OK");
	    }
	    
	    return $buffers;	
	}
	
	function write_error ($string)
	{
		global $error;
		echo "<p><font color=red><b>ERROR:</b><!--begin-->$string<!--end--></font>";
		$error = TRUE;
		return;
	}
	
	function dump_buffer ($buffers,$title = "")
	{
		global $error;
		if ($error) 
		{
			echo "<p>Core dump $title...</p>\n<pre><!--Start Dump-->\n".$buffers."\n<!--Stop Dump--></pre>\n";
		}
		return;
	}
	
	function str_time_delta ($date_c)
	{
	    $delta = time() - $date_c;
	    if ($delta < 60) 
	    {
	    	$satuan = "detik";
	    	$tgl = $delta; 
	    }
	    elseif ($delta < 3600) 
	    { 
	    	$satuan = "menit"; 
	    	$tgl = round($delta/60); 
	    }
	    else 
	    { 
	    	$satuan = "jam"; 
	    	$tgl = sprintf("%01.1f",$delta/3600); 
	    }
	    return "$tgl $satuan";
	}
	
	function ads_parse(&$buffers_orig)
	{
		global $app,$news;
	    //	parsing advertisements in main page
	    if ($app['ads'])
	    {
		    //		get all ad links
		    $regex_ads = "|<a([^>]*)>(.*?)</a>|is";
	        if (!preg_match_all($regex_ads,$buffers_orig,$ads_res,PREG_SET_ORDER)) 
	        {
	        	add_log("parser: ads: 1: fail");
	        	dump_log();
	        }
	        else
	        {
	        	add_log("parser: ads: 1: success");
	        	for ($i = 0; $i < count($ads_res); $i++)
		    	{
		    		//	get a href url
		    		preg_match("|href=\"([^\"]*)\"|is",$ads_res[$i][1],$url_res);
		    		
		    		//hanya url dengan hostname ad.detik yg diambil
		    		if (!preg_match("|http://ad\.detik\.com/link|is",$url_res[1])) continue;
		    		unset($temp);
		    		$temp['url'] = $url_res[1];
		    		$name = trim(strip_tags($ads_res[$i][2]));
		    		if ($name == "")
		    		{
		    			//get name from url
		    			preg_match("|/[^\-]*-([^/]*)\.ad|i",$ads_res[$i][1],$adsname_res);
		    			$name = $adsname_res[1];
		    		}
		    		$temp['name'] = $name;
		    		$news['ads'][] = $temp;
		    	}
	        }
	    }	
		
	}
	
	function ads_view(&$news)
	{
		
		global $app;
		//echo "<pre>";print_r($news);echo "</pre>";
		//		view ads
		if ($app['ads'] and $news['ads'] != '')
		{
			echo "<table align=right bgcolor=#B4D0DC border=0 cellspacing=0 width=100><tr><td><table border=0 cellpadding=3 cellspacing=0 width=100%><tr><td bgcolor=#ECF8FF>";
			echo "<p class=u><span class=i>Iklan</span>";
			foreach ($news['ads'] as $ads)
			{
				$url = $ads['url'];
				$desc = $ads['name'];
				if (strlen($desc)>10) $desc = substr($desc,0,10)."&gt;";
				if ($desc == "") $desc = "Iklan";
				echo "<br><a href=\"$url\" target=m>$desc</a>";	
			}
			echo "</td></tr></table></td></tr></table>";
		}

	}
	
	function news_list_view(&$news)
	{
		global $list_header_output,$list_top_output,$app,$no;				
	    if ($no == "frame") $target = "";
	    else $target = " target=m";
	    
	    echo $list_header_output;
	    echo $list_top_output;
		//	start view list	

		ads_view($news);
		
		//		view headlines
		foreach ($news['headline'] as $headline)
		{
			$headline['url'] = $self."?url=".$headline['url'];
			if ($headline['subtitle'] != "") $headline['subtitle'] .= " - ";
			$date = date('H:i',$headline['date']);
			$date_delta = str_time_delta(strtotime($date));
			echo "<p><span class=d>($date)</span> <span class=j><a href={$headline['url']} $target>{$headline['subtitle']}{$headline['title']}</a></span> <span class=d>[$date_delta lalu]</span>";
			echo "<br><span class=u>{$headline['summary']}</span>";
		}
		
		//		view prevnews
		echo "<br>";
		foreach ($news['prevnews'] as $headline)
		{
			$headline['url'] = $self."?url=".$headline['url'];
			if ($headline['subtitle'] != "") $headline['subtitle'] .= " - ";
			$date = date('H:i',$headline['date']);
			echo "<br><span class=d>($date)</span> <span class=i><a href={$headline['url']} $target>{$headline['subtitle']}{$headline['title']}</a></span>";
			//echo "<br><span class=u>{$headline['summary']}</span>";
		}
		
		//		view topic news
		foreach ($news['topic'] as $topic)
		{
			echo "<p><span class=i>{$topic['title']}</span>";
			foreach ($topic['news'] as $headline)
			{
				$headline['url'] = $self."?url=".$headline['url'];
				if ($headline['subtitle'] != "") $headline['subtitle'] .= " - ";
				$date = date('H:i',$headline['date']);	
				echo "<BR><span class=d>($date)</span> <span class=i><a href={$headline['url']} $target>{$headline['subtitle']}{$headline['title']}</a></span>";
				
			}
		}	
		
		//		view footer
		global $list_footer,$stream_compress,$location,$hostname;
		$temp_orig = "http://$hostname/$location";
		$temp_stream = "Stream: "; if (!$app['proxy_mode']) $temp_stream .= "N/A"; elseif ($stream_compress) $temp_stream .= "Compressed"; else $temp_stream .= "Uncompressed";
		eval("\$list_footer = \"$list_footer\";");
		echo $list_footer;

	}

	function news_detail_view(&$news)
	{
		global $list_header_output,$url,$app;
		//specialized first paragraph
		$news['content'] = preg_replace("|<B>(.*?)<P>|is","<span style=font-size:larger><B>\\1</span><P>",$news['content']);
		//fix url berita terkait
		$news['content'] = preg_replace("|<a href=(\"?)http://www.detik.com|is","<a href=\\1$self?url=http://www.detik.com",$news['content']);

		echo $list_header_output;
		echo "<h3>{$news['subtitle']} {$news['title']}</h3>";
		echo "<p class=u>{$news['reporter']}</p>";
		ads_view($news);
		echo "<span class=u>".$news['content']."</span>";
		//echo "<div id=footer><a href=$url target=_top>Original page</a> | Generated by {$app['name']} v{$app['version']}</div></body></html>";
		
		//		view footer
		global $list_footer,$stream_compress;
		$temp_orig = $url;
		$temp_stream = "Stream: "; if (!$app['proxy_mode']) $temp_stream .= "N/A"; elseif ($stream_compress) $temp_stream .= "Compressed"; else $temp_stream .= "Uncompressed";
		eval("\$list_footer = \"$list_footer\";");
		echo $list_footer;		
		
	}


//	START
	add_log("{$app['name']} v{$app['version']} starting up from {$_SERVER['SERVER_ADDR']}/{$_SERVER['SERVER_PORT']}");

	ob_end_flush();
	
	if (isset($url))
	{
		if ($app['proxy_mode'])
		{
			$detikusable_mode = 'news_detail_from_node';
		}
		else
		{
			$detikusable_mode = 'news_detail';	
		}	
	}
	elseif ($x=="i" or $no=="frame")
	{
		if ($app['proxy_mode'])
		{
			$detikusable_mode = 'news_list_from_node';	//retrieve serialized+processed html containing ready-to-view array from other detik.usable node.
		}
		else
		{
			$detikusable_mode = 'news_list';	//retrieve raw html from detik, parse, and output as new detik.usable-style design	
		}
		
	}
	elseif ($x=="w")
	{
		$detikusable_mode = 'welcome_page';
	}
	elseif ($x=="s")
	{
		$detikusable_mode = 'source_code';
	}
	elseif ($_REQUEST['au'])
	{
		$detikusable_mode = 'auto_update';			
	}
	elseif ($_REQUEST['cm'])
	{
		$detikusable_mode = 'cache_management';			
	}	
	else
	{
		$detikusable_mode = 'frame_set';	
	}
	add_log("mode: $detikusable_mode");

// DETIK.USABLE IN --NEWS DETAIL MODE--
if ($detikusable_mode == 'news_detail') {
	$url = $_REQUEST['url'];	    
    
    if (preg_match("/http:\/\/([^\/]*)(\/.*)/",$url,$result)) 
    {
    	$hostname = $result[1];
    	$location = $result[2];
    }
    else 
    {        
        $location = "/peristiwa".$url;        
    }    
    
    if ($app['cache'])	//check if already in cache
	{
		$urls = parse_url($url);
		$filename = 'cache/'.basename($urls['path']);
		if (file_exists($filename))
		{
			$buffer = "";
			$fp = fopen($filename,'r');
			while(!feof($fp))
			{
		       $buffer .= fread($fp,1024);
			}
			fclose($fp);
			$news = unserialize($buffer);
			$news_from_cache = TRUE;
		}
	}	
    
    if ($news_from_cache or !socket_open($hostname,80))
    {
		//do nothing
    }
    else
    {
		sock_send_request ($hostname,$location);
		
		sock_recv_header ();
		
		//	recv all response body
        while (!feof ($fp)) 
        {
            $buffer = fgets($fp, 65536);
            $buffers .= $buffer;
        }
        fclose($fp);
        
        $buffers_orig = $buffers;
        
    	$regex_start = "<blockquote>";
    	//$regex_end = "<font face=\"'MS Sans Serif'";
    	$regex_end = "<!-- FORM";        	
    	$regex_1 = "|$regex_start(.*?)$regex_end|is";
    	
		if (!preg_match($regex_1,$buffers,$result))
		{
			add_log("parser: newsdetail: 1: fail ($regex_1)");
			add_log("parser: $buffers");				
		}
		else
		{
			add_log("parser: newsdetail: 1: success");
			$buffers = $result[1];
		}
		
		if (preg_match("/berita-foto/",$url)) // this channel is different enough, that need specific pregmathicng
		{
			//		title
			$regex_start = "<FONT size=5>";
			$regex_end = "</font>";
			$regex = "|$regex_start(.*?)$regex_end|is";
	    	
	        if (!preg_match($regex,$buffers,$res))
	        {
	            add_log("parser: newsdetail: title: fail ($regex)");
	            dump_buffer ($buffers);
	        }
	        else
	        {
	        	add_log("parser: newsdetail: title: success");	
	        	$news['title'] = $res[1];
	        }
	        
			//		reporter
			$regex_start = "<BR><FONT color=#ff0000 size=2>";
			$regex_end = "</font>";
			$regex = "|$regex_start(.*?)$regex_end|is";
	    	
	        if (!preg_match($regex,$buffers,$res))
	        {
	            add_log("parser: newsdetail: reporter: fail ($regex)");
	            dump_buffer ($buffers);
	        }
	        else
	        {
	        	add_log("parser: newsdetail: reporter: success");	
	        	$news['reporter'] = $res[1];
	        }

			//		content
			$regex_start = '<P align="Justify">';
			$regex = "|$regex_start(.*)|is";
	    	
	        if (!preg_match($regex,$buffers,$res))
	        {
	            add_log("parser: newsdetail: content: fail ($regex)");
	            dump_buffer ($buffers);
	        }
	        else
	        {
	        	add_log("parser: newsdetail: content: success");	
	        	$news['content'] = $res[1];
	        }
	        
	        //		'recondition' urls in content
	        $news['content'] = preg_replace('|<a href=(.?)http://www.detik.com/|',"<a href=\\1$self?url=http://www.detik.com/",$news['content']);
	        	        
		}
		else
		{
			//		sub-title
			$regex_start = "<font class=.?subjudulberita.?>";
			$regex_end = "</font>";
			$regex = "|$regex_start(.*?)$regex_end|is";
	    	
	        if (!preg_match($regex,$buffers,$res))
	        {
	            add_log("parser: newsdetail: subtitle: fail ($regex)");	            
	        }
	        else
	        {
	        	add_log("parser: newsdetail: subtitle: success");	
	        	$news['subtitle'] = $res[1];
	        }
	        
			//		title
			$regex_start = "<font class=.?judulberita.?>";
			$regex_end = "</font>";
			$regex = "|$regex_start(.*?)$regex_end.*$|is";
	    	
	        if (!preg_match($regex,$buffers,$res))
	        {
	            add_log("parser: newsdetail: title: fail ($regex)");
	            dump_log();
	        }
	        else
	        {
	        	add_log("parser: newsdetail: title: success");	
	        	$news['title'] = $res[1];
	        }            
	
			//		reporter
			$regex_start = "<font class=.?textreporter.?>";
			$regex_end = "</font>";
			$regex = "|$regex_start(.*?)$regex_end|is";
	    	
	        if (!preg_match($regex,$buffers,$res))
	        {
	            add_log("parser: newsdetail: reporter: fail ($regex)");	            
	        }
	        else
	        {
	        	add_log("parser: newsdetail: reporter: success");	
	        	$news['reporter'] = $res[1];
	        }
	
			//		content
			$regex_start = "<font class=.?textberita.?>";
			$regex_end = '(?:<\/font>|$)';	// </font> or end of string
			$regex = "/$regex_start(.*?)$regex_end/is";
	    	
	        if (!preg_match($regex,$buffers,$res))
	        {
	            add_log("parser: newsdetail: content: fail ($regex)");
	            write_error($buffers);
	            dump_log();
	            
	        }
	        else
	        {	        	
	        	add_log("parser: newsdetail: content: success");	
	        	$news['content'] = $res[1];
	        }
	        
	        //clean html
	        $news['reporter'] = strip_tags($news['reporter'],'<b></b><i></i>');
	        $news['content'] = strip_tags($news['content'],'<b></b><i></i><a></a><p></p><br>');			
		}
		
		ads_parse($buffers_orig);

	} 
	
	if ($app['cache'] and !$news_from_cache)	//save serialized array to file
	{
		if (!file_exists('cache')) mkdir('cache',0755);
		$urls = parse_url($url);
		$filename = 'cache/'.basename($urls['path']);
		$buffer = serialize($news);
		$fp = fopen($filename,'w');
		fwrite($fp,$buffer);
		fclose($fp);
	}	
	
	if ($as_node)
	{
		set_magic_quotes_runtime(0); //to avoid null char be converted to \0
		$news_serial = serialize($news);
		if (!$app['zlib_support'] or $uncompressed) echo $news_serial;
		else echo gzcompress($news_serial);
	}
	else
	{
		news_detail_view($news);
	}            
}

if ($detikusable_mode == 'news_list') 
{    
    //$location =  "/tmp/index.htm";
    $location =  "/index.htm";
 
    if (!socket_open($hostname,80))
    {
		//do nothing
    }
    else
    {
		sock_send_request($hostname,$location);
		
		sock_recv_header();

    	//	recv all response body
        while (!feof ($fp)) 
        {
            $buffer = fgets($fp, 65536);
            $buffers .= $buffer;
        }
        fclose($fp);
        
        $buffers_orig = $buffers;
        
		//	narrowing-in to "prevnews" content
		add_log("parser: prevnews: start");
		$regex_prevnews_1 = "=.nmkanal";
		$regex_prevnews_2 = "<IMG";
		$regex_prevnews = "/$regex_prevnews_1(.*?)$regex_prevnews_2(.*)/s";
		unset($result);
		if (!preg_match($regex_prevnews,$buffers,$result))
		{
			add_log("parser: prevnews: fail ($regex_prevnews)");
			add_log("parser: $buffers");
			
		}
		else
		{
			add_log("parser: prevnews: success");
			$pn_buf = $result[1];
			$buffers = $result[2];
			
			$regex_prevnews_all = "/(\d+\/\d+\/\d+.*?) WIB.*?<A href=\"([^\"]*)\"[^>]*>(.*?)<\/A>/is";            
            unset($result);
            if (!preg_match_all($regex_prevnews_all,$pn_buf,$result))
            {
				add_log("parser: prevnews: all: fail $regex_prevnews_all");
                dump_log();
            }
            else
            {                
            	add_log("parser: prevnews: all: success");
            
                for ($i = 0; $i < 7; $i++)
                {	                    
                    $url = $result[2][$i];
                    //$date = $result[1][$i];
                    $date = $url;	//from now on, parse date from url
                    $title_temp = $result[3][$i];
                    
                    //	prevnews->date
                    $regex_prevnews_date = "/\/(\d\d\d\d)(\d\d)(\d\d)\-(\d\d)(\d\d)(\d\d)\.shtml$/i";	                    
	                if (!preg_match($regex_prevnews_date,$date,$date_res)) 
	                {
	                	add_log("parser: prevnews: date: fail");
	                }
	                else
	                {
	                	add_log("parser: prevnews: date: success");
	                	$tgl = $date_res;		                    
	                    $news['prevnews'][$i]['date'] = mktime($tgl[4],$tgl[5],$tgl[6],$tgl[2],$tgl[3],$tgl[1]);
	                }
                    
                    //	prevnews->url
                    //		makeit absolute url
                    if (!preg_match("/http:\/\//",$url)) 
                    {
                    	add_log("parser: prevnews($i): url: add absolute url");
                    	$url = "http://www.detik.com".$url;
                    }
                    //		if link formatted like ...?url=http://.... retrieve the param value instead
                    if (preg_match("/\?url=(.*)/",$url,$url_res)) 
                    {
                    	add_log("parser: prevnews($i): url: get from param");
                    	$url = $url_res[1];
                    }                    
                    $news['prevnews'][$i]['url'] = $url;
                    
                    //	prevnews->subtitle
					$regex_prevnews_subtitle = "/nonhlsubJudul.>(.*?)<\/span>/";					
                    if (!preg_match($regex_prevnews_subtitle,$title_temp,$subtitle_res)) 
                    {
                    	add_log("parser: prevnews($i): no-subtitle");
                    }
                    else
                    {
                    	add_log("parser: prevnews($i): has subtitle");
                    	$news['prevnews'][$i]['subtitle'] = $subtitle_res[1];
                    }
                    
                    //	prevnews->title
                    $regex_prevnews_title = "/nonhlJudul.>(.*)/";
                    if (!preg_match($regex_prevnews_title,$title_temp,$title_res))
                    {
                    	add_log("parser: prevnews($i): no-title ($regex_prevnews_title)");

                    }
                    else
                    {
                     	add_log("parser: prevnews($i): has title");
                    	$news['prevnews'][$i]['title'] = $title_res[1];                   	
                    }               
                }
            }				
		}
		
		//	narrowing-in to headline news content
		add_log("parser: headline: start");
		$regex_headline_1 = '<span class="tanggal">([^<]*)<';
		$regex_headline_2 = '</td';
		$regex_headline = "/{$regex_headline_1}(.*?){$regex_headline_2}(.*)/is";
		$regex_headline = '|(<span class="tanggal">.*?)</td(.*)|is';
		if (!preg_match($regex_headline,$buffers,$result))
		{
			add_log("parser: headline: fail ($regex_headline) - $buffers");
			dump_log();
		}
		else
		{
			add_log("parser: headline: success");
			$hl_buf = $result[1];
			$buffers = $result[2];
            #echo "---".$hl_buf."---";
			#$regex_headline_all = '|tanggal.>[^,]*,(.*?) WIB<.*?<A href="([^"]*)".*?parent.>(.*?<span class="summary">[^<]*?</span>)|is';
            $regex_headline_all = '|tanggal.>[^,]*,(.*?) WIB<.*?<A href="([^"]*)".*?parent.>(.*?<span class="summary">.*?</span>)|is';
            if (!preg_match_all($regex_headline_all,$hl_buf,$result))                
            {
				add_log("parser: headline: all: fail ($regex_headline_all)");
                dump_log();
            }
            else
            {
                
            	add_log("parser: headline: all: success");
                for ($i = 0; $i < 5; $i++) 
                {
                    $date = $result[1][$i];                    
                    $url = $result[2][$i];                    
                    $title = $result[3][$i];                    

                    //	headline->url
                    //		makeit absolute url
                    if (!preg_match("/http:\/\//",$url)) 
                    {
                    	add_log("parser: headline($i): url: add absolute url");
                    	$url = "http://www.detik.com".$url;
                    }
                    //		if link formatted like ...?url=http://.... retrieve the param value instead
                    if (preg_match("/\?url=(.*)/",$url,$url_res)) 
                    {
                    	add_log("parser: headline($i): url: get from param");
                    	$url = $url_res[1];
                    }                    
                    $news['headline'][$i]['url'] = $url;
                   
                    //	headline->subtitle
                    $regex_headline_subtitle = "/subjudul.>(.*?)<\/span/is";
                    if (!preg_match($regex_headline_subtitle,$title,$subtitle_res)) 
                    {
                    	add_log("parser: headline($i): subtitle: fail");
                    }
                    else
                    {
                    	add_log("parser: headline($i): subtitle: success");
                        $news['headline'][$i]['subtitle'] = $subtitle_res[1];
                    }
                   
                   	//	headline->title
                   	$regex_headline_title = "/strJudul.>(.*?)<\/span/is";
                    #$regex_headline_title = "/strJudul/is";
                    if (!preg_match($regex_headline_title,$title,$title_res))
                    {
                        
                    	add_log("parser: headline($i): title: fail ($regex_headline_title)");
                    	dump_log();
                    }
                    else
                    {
                    	add_log("parser: headline($i): subtitle: success");
                    	$news['headline'][$i]['title'] = $title_res[1];
                    }

                   	//	headline->summary
                   	$regex_headline_summary = "/summary.>(.*?)<\/span/s";
                    if (!preg_match($regex_headline_summary,$title,$summary_res))
                    {
                    	add_log("parser: headline($i): summary: fail ($regex_headline_summary)");
                    	dump_log();
                    }
                    else
                    {
                    	add_log("parser: headline($i): summary: success");
                    	$news['headline'][$i]['summary'] = $summary_res[1];
                    }
             
                    //	headline->date
                    $date = preg_replace('/([0-9]*)\/([0-9]*)\//','\\2/\\1/', $date);
                    $news['headline'][$i]['date'] = strtotime($date);
                    //$news['headline'][$i]['delta'] = str_time_delta(strtotime($date));
                }                	
            }

		}
		
		//	narrowing-in to topic news content
		
		add_log("parser: topic: start");
		add_log("parser: topic: all");
		$regex_topic_all = "/<cfoutput>(.*?)<\/cfoutput>(.*?)<\/table>/si";
        if (!preg_match_all($regex_topic_all,$buffers,$result))
        {
            add_log("parser: topic: fail ($regex_topic_all)");
            dump_log();
        }			
        else
        {
        	add_log("parser: topic: success");
        	$tp_buff = $result;
        	$count_topic = count($tp_buff[1]);
	        for ($i = 0; $i < $count_topic; $i++) 
	        {
				//	topic->title
				$news['topic'][$i]['title'] = $tp_buff[1][$i];					
				
				//	
				$regex_topic_detail = "/90%\">(.*?)<a href=\"([^\"]*)\".*?\"judulhlbawah\">(.*?)<\/font>/is";
	            if (!preg_match_all($regex_topic_detail,$tp_buff[2][$i],$tpdetail_buff))
	            {
	            	add_log("parser: topic($i): detail: fail");
	            	dump_log();
	            }
	            else
	            {
	            	add_log("parser: topic($i): detail: success");
	            	
	            	$titles = $tpdetail_buff[3];
	            	$urls = $tpdetail_buff[2];
	            	$dates = $urls; //date will be parsed from url
	            	
		            $count_news = count($tpdetail_buff[1]);
		            for ($j = 0; $j < $count_news; $j++) 
		            {
		                //	topic->title->title
		                $news['topic'][$i]['news'][$j]['title'] = $titles[$j];
		                
		                //	topic->title->url
		                $regex_topic_url = "/\?url=(.*)/";
		                if (!preg_match($regex_topic_url,$urls[$j],$urls_res))
		                {
		                	//try apakah ini http biasa
		                	$regex_topic_url = "|^http://|";
		                	if (!preg_match($regex_topic_url,$urls[$j],$urls_res))
		                	{
								add_log("parser: topic($i): detail($j): url: fail");
							}
							else
							{
								add_log("parser: topic($i): detail($j): url: success (2nd try)");
		                		$news['topic'][$i]['news'][$j]['url'] = $urls[$j];
							}
		                }
		                else
		                { 
		                	add_log("parser: topic($i): detail($j): url: success");
		                	$news['topic'][$i]['news'][$j]['url'] = $urls_res[1];
		                }
		                
		            	//	topic->title->date
		            	$regex_headline_date = "/\/(\d\d\d\d)(\d\d)(\d\d)\-(\d\d)(\d\d)(\d\d)\.shtml$/i";
		                if (!preg_match($regex_headline_date,$dates[$j],$tpdetail_res)) 
		                {
		                	add_log("parser: topic($i): detail($j): date: fail");
		                }
		                else
		                {
		                	add_log("parser: topic($i): detail($j): date: success");
		                	$tgl = $tpdetail_res;
		                    //$news['topic'][$i]['news'][$j]['date'] = date("H:i",mktime($tgl[4],$tgl[5],$tgl[6],$tgl[2],$tgl[3],$tgl[1]));
		                    $news['topic'][$i]['news'][$j]['date'] = mktime($tgl[4],$tgl[5],$tgl[6],$tgl[2],$tgl[3],$tgl[1]);
		                }
		            }
	            }
	        }	        	
        }
	    
	    ads_parse($buffers_orig);
    
    }

	//dump_log();
	if ($as_node)
	{
		set_magic_quotes_runtime(0); //to avoid null char be converted to \0		
		$news_serial = serialize($news);
		if (!$app['zlib_support'] or $uncompressed) echo $news_serial;
		else echo gzcompress($news_serial);
	}
	else
	{
		news_list_view($news);		
	}
			
}

if ($detikusable_mode == 'news_list_from_node')
{
	$app['proxy_url'] .= "?x=i&as_node=1";
	if (!$app['zlib_support'])	$app['proxy_url'] .= "&uc=1";	//ask uncompressed stream if i don't support zlib library
	$fp = fopen($app['proxy_url'],'r');
	$buffer = fread($fp,100000);
	fclose($fp);
	if ($buffer == "")
	{
		write_error('newslist: from node: Unable to download from node');
	}
	else
	{
		$buffer_orig = $buffer;		
		if ($app['zlib_support'])
		{
			$buffer = @gzuncompress($buffer);		
			if (!buffer)
			{
				//write_error('newslist: from node: Unable to uncompress data');
				//let's assume it's not gzcompressed
				add_log('newslist: from node: unable to uncompress data');
				$buffer = $bufer_orig;
			}
			else
			{
				$stream_compress = TRUE;	
			}
		}
		
		$buffer = unserialize($buffer);
		if (!buffer)
		{
			write_error('newslist: from node: Unable to unserialize data');
			exit;
		}
		
		if (!is_array($buffer))
		{
			write_error("newslist: from node: Data is not formatted correctly: X{$buffer}X");
			exit;
		}
		
		news_list_view($buffer);
		
	}
}

if ($detikusable_mode == 'news_detail_from_node')
{
	$app['proxy_url'] .= "?url=$url&as_node=1";
	if (!$app['zlib_support'])	$app['proxy_url'] .= "&uc=1";	//ask uncompressed stream if i don't support zlib library
	$fp = fopen($app['proxy_url'],'r');
	$buffer = fread($fp,100000);
	fclose($fp);
	if ($buffer == "")
	{
		write_error('newsdetail: from node: Unable to download from node');
	}
	else
	{		
		$buffer_orig = $buffer;		
		
		if ($app['zlib_support'])
		{		
			$buffer = @gzuncompress($buffer);		
			if (!buffer)
			{
				//write_error('newslist: from node: Unable to uncompress data');
				//let's assume it's not gzcompressed
				add_log('newslist: from node: unable to uncompress data');
				$buffer = $bufer_orig;
			}
			else
			{
				$stream_compress = TRUE;	
			}			
		}
		
		$buffer = unserialize($buffer);
		if (!buffer)
		{
			write_error('newsdetail: from node: Unable to unserialize data');
			exit;
		}
		
		if (!is_array($buffer))
		{
			write_error('newsdetail: from node: Data is not formatted correctly');
			exit;
		}
		news_detail_view($buffer);		
	}
}

if ($detikusable_mode == 'welcome_page') 
{ 
	echo $header_ouput;
	echo $welcomepage_output;
} 

if ($detikusable_mode == 'source_code') 
{
    $loc = $_SERVER['DOCUMENT_ROOT']."/".basename($self);
    show_source($loc);
} 

if ($detikusable_mode == 'frame_set')
{	
	echo $frameset_output;
}

if ($detikusable_mode == 'auto_update')
{	
	if (!$_REQUEST['commit'])
	{
		//compare version
		$fp = fopen($app['update_url'],'r');
		while(!feof($fp))
		{			
			$buffer = fgets($fp,1024);
			if (preg_match('/\$app\[\'version\'\]\s*=\s*"([^"]*)"/i',$buffer,$remote_res))
			{
	       		$remote_version = $remote_res[1];
	       		break;
			}
			else
			{
				$remote_version = "0";
			}
		}
		fclose($fp);

		echo $list_header_output;
		echo "<h4>Check versi terbaru</h4>";
		echo "<ul><li>Versi detik.usable ini: <b>{$app['version']}</b><li>Versi detik.usable terbaru: <b>$remote_version</b></ul>";
		if ($remote_version > $app['version']) echo "<p><form method=get action=$self><input type=hidden name=au value=1><input type=hidden name=commit value=1><input type=submit value=\"Update ke $remote_version\"></form>";
		else echo "<p>detik.usable ini sudah versi terbaru.";
		echo "<p><a href=$self?x=w>Back to welcome page</a>";
	}
	else
	{	
		unset($buffer);
		$fp = fopen($app['update_url'],'r');
		while(!feof($fp))
		{
	       $buffer .= fread($fp,1024);
		}
		fclose($fp);
		
		if ($buffer == "")
		{
			write_error("auto_update: Unable to get latest version at {$app['update_url']}");
			echo "<p><a href=$self?x=w>Back to welcome page</a>";
		}
		else
		{
			$target = $_SERVER['SCRIPT_FILENAME'];
			$fp = fopen($target,'w');
			fwrite($fp,$buffer);
			fclose($fp);
			echo $list_header_output;
			echo "<h4>Update Berhasil</h4>";
			echo "<p><a href=$self target=_top>Reload detik.usable</a>";
		}
	}
}

if ($detikusable_mode == 'cache_management')
{
	if (!$_REQUEST['commit'])
	{
		$dirsize = 0;
		$dh = opendir('cache');
		while ($filename = readdir($dh)) if (($file_name != "." && $file_name != "..")) $dirsize += filesize('cache/'.$filename);
		$cache_size = round($dirsize/1024,2);
		echo $list_header_output;
		echo "<h4>Pengaturan Cache</h4>";
		echo "Total space yang digunakan cache: ".$cache_size." KB";		
		if ($cache_size > 0) echo "<p><form method=get action=$self><input type=hidden name=cm value=1><input type=hidden name=commit value=1><input type=submit value=\"Kosongkan Cache\"></form>";
		//else echo "<p>detik.usable ini sudah versi terbaru.";
		echo "<p><a href=$self?x=w>Back to welcome page</a>";		
	}
	else
	{
		$dh = opendir('cache');
		while ($filename = readdir($dh)) if (($file_name != "." && $file_name != "..")) @unlink('cache/'.$filename);		
		echo $list_header_output;
		echo "<h4>Cache telah dikosongkan</h4>";
		echo "<p><a href=$self?x=w>Back to welcome page</a>";

	}	
}

Comment and Contribute

Your comment has been submitted and is pending approval.

Author:
dody wijaya

Comment:



Comment:

(Maximum characters: 1200). You have characters left.