PHPBuilder - HTTP XML web search interface



RSS Twitter
Snippets Other

HTTP XML web search interface

by: SearchHippo
|
April 20, 2001

Version: 1.0

Type: Full Script

Category: Other

License: Other

Description: This simple snippet allows you to interface with the SearchHippo web search engine's http xml api. You must have expat compiled in to use this snippet. You can layer mostly any presentation around the results that come back. Latest version will always be available at http://www.searchhippo.com/partner.php This is very similar to how one might use SOAP.



<?php
////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2001, SearchHippo.com 
// info@searchhippo.com
// http://www.searchhippo.com/
//
// Note: use of this code indicates agreement to our attribution terms
// Please see http://www.searchhippo.com/partner.php
//
// Rev 1.0 - 4/20/2K+1
//
// Feel free to hack this up however you like.  The only requirements are:
//   1) You must retain this notice
//   2) You must retain attribution (see bottom of file) to searchhippo.
//   3) You must retain the redirect through our servers for clickthroughs
//
// This page should be called with 'q' as a parameter passed in through
// the url that contains the query request.  For example, if you save this
// page as "searchhippo.php", you might call 
// http://[your-domain]/searchhippo.php?q=searchhippo
// again, where q are the search critera.
//
// Have fun, enjoy, and distribute freely!
//
////////////////////////////////////////////////////////////////////////


function startElementHandler ($parser, $element_name, $element_attribs)
{
  global $recctr;
  global $curstate;
  global $recs;
  global $qrystatus;
  global $data_version;

  if ($element_name == "RESULTS") {
    $qrystatus ["count"] = trim ($element_attribs ["COUNT"]);
    $qrystatus ["head"] = trim ($element_attribs ["HEAD"]);
    $qrystatus ["tail"] = trim ($element_attribs ["TAIL"]);
    $qrystatus ["nexturl"] = trim ($element_attribs ["NEXTURL"]);
    $qrystatus ["prevurl"] = trim ($element_attribs ["PREVURL"]);
  }

  if ($element_name == "RECORD") {
    $recs [$recctr] ["id"] = trim ($element_attribs ["ID"]);
    $recs [$recctr] ["timestamp"] = trim ($element_attribs ["TIMESTAMP"]);
    $recs [$recctr] ["size"] = trim ($element_attribs ["SIZE"]);
  }

  if ($element_name == "SEARCHHIPPO") {
    $data_version = trim ($element_attribs ["VERSION"]);
  }

  $curstate = $element_name;
}

function endElementHandler ($parser, $element_name)
{
  global $recctr;
  global $curstate;
  global $recs;

  $curstate = "";

  if ($element_name == "RECORD") { $recctr++; }
}

function characterDataHandler ($parser, $data)
{
  global $recctr;
  global $curstate;
  global $recs;

  if ($curstate == "") return;
  else if ($curstate == "URL") { $recs [$recctr] ["url"] = trim ($data); }
  else if ($curstate == "TITLE") { $recs [$recctr] ["title"] = trim ($data); }
  else if ($curstate == "DESCR") { $recs [$recctr] ["descr"] = trim ($data); }
}



// global vars

$recctr = 0;             // number of records retrieved
$in_item_tag = 0;        // flag for expat parsing
$curstate = '';          // for xpath state
$recs = array ();        // returned data results
$qrystatus = array ();   // returned data status
$data_version = '';      // version of results page

$xml_parser = xml_parser_create();
if (!($xml_parser)) {
  die ("can't get xml parser!");
} else {

  xml_set_element_handler ($xml_parser, 
    "startElementHandler", 
    "endElementHandler");

  xml_set_character_data_handler ($xml_parser, 
    "characterDataHandler");

  $q = stripslashes ($q);
  $qenc = urlencode ($q);   // q is query
  $ienc = urlencode ($i);   // i is 'initial count' for paging through results
  $cenc = urlencode ($c);   // c is 'count' for how many results to retrieve.

  $fp = fopen (
      "http://www.searchhippo.com/qxml?q=$qenc&c=$cenc&i=$ienc", "r");
  if ($fp) {
    $data = fread ($fp, 4096);
    while ($data) {
      if (!xml_parse ($xml_parser, $data, feof ($fp))) { break; }
      $data = fread ($fp, 4096);
    }
  }
  xml_parser_free ($xml_parser);


// 
// Now all of our data has been read from the url and parsed.  We now
// have populated the two global arrays, $recs and $qrystatus.
// additionally, $recctr will have the number of results we retrieved
// (i.e. the size of the $recs array
//


  if ($recctr > 0) {

    if ($data_version != "1.0") {
      printf ("<!-- data_version mismatch! Got %s, expected 1.0! -->", 
	  $data_version);
    }

    if ($qrystatus ["prevurl"][0]) {
// Modify slightly for the 'local' page:
      printf ("[ <A HREF=\"%s%s\">Prev</A> ]", 
	$PHP_SELF, strchr ($qrystatus ["prevurl"], "?"));
    }

    printf (" Results %d - %d of %s ",
      $qrystatus ["head"],
      $qrystatus ["tail"],
      $qrystatus ["count"]);

    if ($qrystatus ["nexturl"][0]) {
// Modify slightly for the 'local' page:
      printf ("[ <A HREF=\"%s%s\">Next</A> ]", 
	$PHP_SELF, strchr ($qrystatus ["nexturl"], "?"));
    }

    echo "</CENTER><P>";


// Here we are going to loop over all the items in the $recs array and
// output as necessary.
    for ($x = 0; $x < $recctr; $x++) {
      if (strlen ($recs [$x] ["title"]) > 64) {
        $ShowTitle = sprintf ("%-.61s...", $recs [$x] ["title"]);
      } else $ShowTitle = $recs [$x] ["title"];

      if (strlen ($recs [$x] ["descr"]) > 192) {
        $ShowDescr = sprintf ("%-.189s...", $recs [$x] ["descr"]);
      } else $ShowDescr = $recs [$x] ["descr"];

      $DisplayURL = ""; 
      $Tmp = strchr ($recs [$x] ["url"], "?");
      if ($Tmp) {
        // Skip first three characters 
	for ($i = 3; $i < strlen ($Tmp); $i++) { 
	  $DisplayURL .= $Tmp [$i];
	}
      }
      else $DisplayURL = $recs [$x] ["url"];


      if (!$ShowTitle) $ShowTitle = $DisplayURL;

// Optionally, one might remove the "http://r.searchhippo.com/r?u=" piece
// from the URL *displayed*  Please keep the redirect there for the actual
// hyperlink, because we use this information in determining which 
// sites are clicked on the most which one of the variables in our 
// ranking mechanism.


// Outputting some HTML below
/*******************************************************/

      printf ("<FONT SIZE=\"2\">%d. </FONT><A HREF=\"%s\"><FONT SIZE=\"2\"><B>%s</B></FONT></A><BR>",
        $x + $qrystatus ["head"],
        htmlspecialchars ($recs [$x] ["url"], ENT_NOQUOTES),
        $ShowTitle);
      if (strlen ($ShowDescr) > 0) {
        printf ("<FONT SIZE=\"2\">%s</FONT><BR>", $ShowDescr);
      }
      printf ("<FONT SIZE=\"2\" COLOR=\"#117711\">%s</FONT><BR>\n", 
        $DisplayURL);
  

      printf("<HR>\n");
/*******************************************************/
    }

    printf ("");


  } else {
    printf ("<CENTER><FONT SIZE=\"2\" COLOR=\"#CC1111\">Sorry, no results available for <B>\"%s\"</B></FONT></CENTER>\n", $q);
  }
}


// You *MUST* leave SearchHippo attribution!

echo '
<CENTER>
<IMG SRC="http://www.searchhippo.com/pics/hippo_25.gif" HEIGHT="25" WIDTH="27">
Powered by:
<A HREF="http://www.searchhippo.com/">
<FONT SIZE="+1">SearchHippo.com</FONT></A>
<IMG SRC="http://www.searchhippo.com/pics/hippo_25.gif" HEIGHT="25" WIDTH="27">
</CENTER>
';

?>

Comment and Contribute

Your comment has been submitted and is pending approval.

Author:
SearchHippo

Comment:



Comment:

(Maximum characters: 1200). You have characters left.