Version: 1.0a
Type: Class
Category: Other
License: GNU General Public License
Description: PHP Microsoft DBX File Reader reads files in the mailbox format used by Outlook Express.
<?
/*********************************************************************************************
Name: ms_dbx_reader.class.inc.php
Author: Antony Raijekov a.k.a Zeos (dev@strategma.bg) Bulgaria/Sofia
Version: 1.0
Description: Microsoft DBX file reader /such as Outlook Express Mailbox database files/
Date: 9/27/2002 2:53 GMT +2
License: GPL
Note: Please if you use this class send me an e-mail, just for information 10x
I test this class with 15 folders. ~ 10MB - 319 mails, the class processed them for 0.6 sec :)
*********************************************************************************************/
// CLASS IMPLEMENTATION
/********************************************************************************************
* @description MS DBX file reader /e.g. Outlook Express DBX file reader/
* @author Zeos <rj_@mail.bg>
* @version 1.0a
* @copyright GPL license
* @access public
*******************************************************************************************/
class ms_dbx_reader
{
var $fname = null;
var $mails = array();
var $debug = false;
var $tmp = array();
function clear()
{
$this -> fname = '';
unset($this -> mails);
$this -> mails = array();
unset($this -> tmp);
$this -> tmp = array();
}
function ms_dbx_reader($fname,$debug = false)
{
$this -> debug = $debug;
$this -> fname = $fname;
//open file [fname]
$fp = @fopen($fname,"rb");
if(!$fp) return false;
//seek to read fileInfo
fseek($fp,0xC4);
$header_info = @unpack("Lposition/LDataLength/nHeaderLength/nFlagCount",@fread($fp,12));
//tables count in DBX
$tables = $header_info['position'];
//show debug info.
if($this -> debug) print "Processing {$header_info[position]} message(s) in [".basename($fname)."]......";
//go to the first table offest and process it
if($header_info[position] > 0)
{
fseek($fp,0x30);
$buf = unpack("Lposition",fread($fp,4));
$position = $buf[position];
$this -> readIndex($fp,$position);
$res = true;
}
if($this -> debug) print 'done<br>';
fclose($fp);
}
//helper function to read a null-terminated string from binary file
function readstring(&$buf,$pos)
{
$str = '';
if($len = strpos(substr($buf,$pos),chr(0))) $str = substr($buf,$pos,$len);
return $str;
}
function ReadMessage($fp,$position)
{
$msg = false;
if ($position > 0)
{
fseek($fp,0xC4);
$IndexItemsCount = array_pop(unpack("S",fread($fp,4)));
if($IndexItemsCount > 0)
{
fseek($fp,$position);
$msg = ''; $part = 0;
while (!feof($fp))
{
$part++;
$s = fread($fp,528);
if(strlen($s) == 0) break;
$msg_item = unpack("LFilePos/LUnknown/LItemSize/LNextItem/a511Content",$s);
if($msg_item['FilePos'] <> $position) die("Read $part part of message verify error");
$msg .= substr($msg_item['Content'],0,$msg_item['ItemSize']);
$position = $msg_item['NextItem'];
if($position == 0) break;
fseek($fp,$position);
}
}
}
return $msg;
}
function ReadMessageInfo($fp,$position)
{
$message_info = array();
fseek($fp,$position);
$msg_header = unpack("Lposition/LDataLength/SHeaderLength/SFlagCount",fread($fp,12));
if($msg_header['position'] != $position) die('Message Info verify error');
$message_info['HeaderPosition'] = $position;
$flags = ($msg_header['FlagCount'] & 0xFF);
$DataSize = $msg_header['DataLength'] - ($flags*4);
$size = 4*$flags;
$FlagsBuffer = fread($fp,$size);
$size = $DataSize;
$DataBuffer = fread($fp,$size);
$message_info = array();
//process flags
for ($i = 0; $i < $flags; $i++ )
{
$pos = 0;
$f = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4)));
//print "FLAG:".sprintf("0x%x",($f & 0xFF))."<br>";
switch ($f & 0xFF)
{
case 0x1 : $pos = $pos + ($f >> 8);
$message_info['MsgFlags'] = array_pop(unpack("C",substr($DataBuffer,$pos,1)));
$pos++;
$message_info['MsgFlags'] += array_pop(unpack("C",substr($DataBuffer,$pos,1)))*256;
$pos++;
$message_info['MsgFlags'] += array_pop(unpack("C",substr($DataBuffer,$pos,1)))*65536;
break;
case 0x2 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['Sent'] = array_pop(unpack("L",substr($DataBuffer,$pos,4)));
break;
case 0x4 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['position'] = array_pop(unpack("L",substr($DataBuffer,$pos,4)));
break;
case 0x7 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['MessageID'] = $this -> readstring($DataBuffer,$pos);
break;
case 0x8 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['Subject'] = $this -> readstring($DataBuffer,$pos);
break;
case 0x9 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['From_reply'] = $this -> readstring($DataBuffer,$pos);
break;
case 0xA : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['References'] = $this -> readstring($DataBuffer,$pos);
break;
case 0xB : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['Newsgroup'] = $this -> readstring($DataBuffer,$pos);
break;
case 0xD : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['From'] = $this -> readstring($DataBuffer,$pos);
break;
case 0xE : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['Reply_To'] = $this -> readstring($DataBuffer,$pos);
break;
case 0x12 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['Received'] = array_pop(unpack("L",substr($DataBuffer,$pos,4)));
break;
case 0x13 : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['Receipt'] = $this -> readstring($DataBuffer,$pos);
break;
case 0x1A : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['Account'] = $this -> readstring($DataBuffer,$pos);
break;
case 0x1B : $pos += array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
$message_info['AccountID'] = intval($this -> readstring($DataBuffer,$pos));
break;
case 0x80 : $message_info['Msg'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
break;
case 0x81 : $message_info['MsgFlags'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
break;
case 0x84 : $message_info['position'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
break;
case 0x91 : $message_info['size'] = array_pop(unpack("L",substr($FlagsBuffer,$i*4,4))) >> 8;
break;
}
}
return $message_info;
} // end func ReadMessageInfo
function readIndex($fp,$position)
{
fseek($fp,$position);
$index_header = unpack("LFilePos/LUnknown1/LPrevIndex/LNextIndex/LCount/LUnknown",fread($fp,24));
if($index_header['FilePos'] != $position) die('Verify error');
$this -> tmp[$position] = true; //push it into list of processed items
if(($index_header[NextIndex] > 0) AND ($this -> tmp[$index_header[NextIndex]] != true)) $this ->readIndex($fp,$index_header['NextIndex']);
if(($index_header[PrevIndex] > 0) AND ($this -> tmp[$index_header[PrevIndex]] != true)) $this ->readIndex($fp,$index_header['PrevIndex']);
$icount = $index_header[Count] >> 8;
if($icount > 0)
{
fseek($fp,$index_header['FilePos'] + 24);
$buf = fread($fp,12*$icount);
for ($i = 0; $i < $icount; $i++)
{
$hdr_buf = substr($buf,$i*12,12);
$IndexItem = unpack("LHeaderPos/LChildIndex/LUnknown",$hdr_buf);
if($IndexItem['HeaderPos']>0)
{
if (strtolower($this -> fname) == 'folders.dbx')
//read_folder($fp,$IndexItem['HeaderPos']);
print 'Read folder not implemented in v1.0a<br>';
else
{
$mail['info'] = $this ->ReadMessageInfo($fp,$IndexItem['HeaderPos']);
$mail['content'] = $this ->ReadMessage($fp,$mail['info']['position']);
$this -> mails[] = $mail;
}
}
if(($IndexItem['ChildIndex']>0) AND ($this -> tmp[$IndexItem['ChildIndex']] != true) ) $this ->ReadIndex($fp,$IndexItem['ChildIndex']);
} //end for
} //end if
} //end func readIndex
//debug function to display human readble message flags (Just for debugging purpose)
function decode_flags($x)
{
$decode_flag['DOWNLOADED'] = 0x1;
$decode_flag['MARKED'] = 0x20;
$decode_flag['READED'] = 0x80;
$decode_flag['DOWNLOAD_LATER'] = 0x100;
$decode_flag['NEWS_MSG'] = 0x800; // to verify
$decode_flag['ATTACHMENTS'] = 0x4000;
$decode_flag['REPLY'] = 0x80000;
$decode_flag['INSPECT_CONVERSATION'] = 0x400000;
$decode_flag['IGNORE_CONVERSATION'] = 0x800000;
$decoded_flags = '';
if(($x & $decode_flag['NEWS_MSG']) != 0) $decoded_flags .= "NEWS MESSAGE\n<br>";
if(($x & $decode_flag['DOWNLOAD_LATER']) != 0) $decoded_flags .= "DOWNLOAD LATER\n<br>";
if(($x & $decode_flag['DOWNLOADED']) != 0) $decoded_flags .= "DOWNLOADED\n<br>";
if(($x & $decode_flag['READED']) != 0) $decoded_flags .= "READED\n<br>";
if(($x & $decode_flag['MARKED']) != 0) $decoded_flags .= "MARKED\n<br>";
if(($x & $decode_flag['ATTACHMENTS']) != 0) $decoded_flags .= "ATTACHMENTS\n<br>";
if(($x & $decode_flag['REPLY']) != 0) $decoded_flags .= "REPLY\n<br>";
if(($x & $decode_flag['INSPECT_CONVERSATION']) != 0) $decoded_flags .= "INSPECT CONVERSATION\n<br>";
if(($x & $decode_flag['IGNORE_CONVERSATION']) != 0) $decoded_flags .= "IGNORE CONVERSATION\n<br>";
return $decoded_flags;
}
} // end class ms_dbx_reader
/********************************************************************************************
* Description: Example of use ms_dbx_reader class
* Date: 9/27/2002
* Author: Zeos <dev@strategma.bg>
********************************************************************************************/
//create object and pass the DBX file for binary reading and soring into array of mails
$mailbox = new ms_dbx_reader('./data/else.dbx',true);
//get first mail from mailbox object
$mail = $mailbox -> mails[1];
//prepare mail info for friendly display
$mail_info = '';
foreach ($mail['info'] as $k => $v) $mail_info .= "$k = ".htmlspecialchars($v)."\n<br>";
$mail_flags = $mailbox -> decode_flags($mail['info']['MsgFlags']);
//prepare raw mail for friendly display
$mail_size = strlen($mail['content']);
$mail_content = nl2br(htmlspecialchars($mail['content']));
//just dump them
?>
<html>
<head>
<title>MS DBX file reader class by Zeos [dev@strategma.bg]</title>
<meta NAME="Author" CONTENT="Zeos">
<meta NAME="Keywords" CONTENT="MS Outlook DBX reader">
<meta NAME="Description" CONTENT="Microsoft DBX file reader (such as Outlook Express Mailbox database files)">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
<!-- just for nice vie :) -->
<STYLE>
BODY, TD, TR, TH, P {
font-family: Arial;
font-size: 14px;
color: Black;
}
</STYLE>
</head>
<body>
<hr>
<!-- disply message information stored in DBX file -->
<?=$mail_info?>
<!-- disply OE message flags -->
flags:<br>
<BLOCKQUOTE><?=$mail_flags?></BLOCKQUOTE>
raw mail size: <?=$mail_size?> bytes<br>
<hr>
<!-- display MIME message as-is -->
<?=$mail_content?>
</body>
</html>