Version: 0.9
Type: Function
Category: HTML
License: GNU General Public License
Description: First pass at a simple HTML parser. Give it a chunk of HTML, and it will call your methods for every tag it encounters.
# Written March 2001 by Nathaniel Hekman
# Comments/bugs to nate@hekman.net
#
# html_parse will parse a chunk of HTML code.
# string $tagFn : function to call for each tag. This function
# is called for *every* tag, opening and closing.
# It would be easy to change this to call a
# different function for open and close tags.
# The tagFn has these arguments:
# function tagFn($tag, $attribs)
# string $tag : the tag
# string $attribs : all the attributes, as one long string
# wouldn't be hard to change this to an assoc array
# string $dataFn : function to call for data (non-tag). This function
# is called for all the data in between tags, including
# comments. The dataFn has these arguments:
# function dataFn($data)
# string $data : the text
# string $data : the html to parse
function html_parse($tagFn, $dataFn, $data) {
$comment = false;
# Split on '<', so the beginning of each array entry
# will be a tag.
$TagLine = explode('<', $data);
# Loop through each entry -- each entry is a tag
# followed by everything up to the next tag.
foreach ($TagLine as $l) {
# If we're not in a comment block, then check if
# one starts here.
if (!$comment) {
if (substr(ltrim($l), 0, 3) == "!--") {
# this is the beginning of a comment, not a tag
$comment = true;
$commentline = "";
}
}
# If we're in a comment block, add this entry to
# the comment block, and check if it ends here.
if ($comment) {
$commentline .= "<".$l;
if (strstr($l, "-->")) {
$line = $commentline;
$tag = "";
$comment = false;
}
else {
$line = "";
$tag = "";
}
}
# Otherwise, split on '>' to separate the tag from
# the data.
else {
list($tag, $line) = explode('>', $l, 2);
}
# If there's a tag, call the tag function
if (strlen($tag)) {
$tag = trim($tag);
list($tag, $attribline) = explode(' ', $tag, 2);
# I could split $attribline into an
# associative array, but I don't need
# that functionality now.
$tagFn($tag, $attribline);
}
# If there's data, call the data function
if (strlen($line)) {
$dataFn($line);
}
}
}