Version: 1.0

Type: Function

Category: Other

License: Mozilla Public License

Description: Function that takes a string in CSV format and splits it into an array



function CSVSplit( $str )
{
    /* ================================================
     * Copyright 2002, StepTwo Designs Pty Ltd
     * Freely available to use. Please leave this
     * credit in place. Please notify bwhite@steptwo.com.au 
     * if you find any problems.
     * ================================================
     *
     * This function will take an input string which
     * is a CSV record, and return an array of it split
     * into it's component fields.
     *
     * CSV is complicated by how to handle the ways that 
     * double quotes and commas interact. The simplest way
     * to describe what it does is to give some examples
     * of fields it can handle
     *
     *   Field                    Result
     *    ,,          =======>     ''
     *    ,"",        =======>     ''
     *    ,abc,       =======>     'abc'
     *    ,"a,b,c",   =======>     'a,b,c'
     *    ,"a,"",c",  =======>     'a,",c'
     *    ,a 2" pipe, =======>     'a 2" pipe'
     *
     */
     
    $quottot = 0;
    $bits = array();
    $lastbit = "";
    
    if ( ! $str )
        return $bits;
        
    // Remove any line end markers
    $str = ereg_replace("[\r\n]+$", "", $str );
    
    // The guts of the way this function works is by counting
    // the number of '"' charcaters that have been collected
    // so far in a CSV field - if that is 0 or an even number
    // then the field is complete.
    //
    // This is, however, broken by cases such as 
    //
    //   ,Pipe is 2.5" in diameter,
    //
    // To get around this, the first thing to do is to replace
    // any '"' char that isn't next to a ',' or the end or start
    // of a line or another '"' with '""'
    
    $str = ereg_replace("([^\",])\"([^\",])", "\\1\"\"\\2", $str );
    
    foreach( explode( "," , $str) as $bit )
    {
        // What we are doing here is counting the total number of
        // '"'s since this field started. If that turns out to be
        // and even number or zero, then this indicates that we have 
        // reached the end of this field
        
        $cnt = count_chars( $bit, 1 );
        $quottot += $cnt[ord('"')];
        $lastbit = $lastbit . $bit;

        if ( ( $quottot % 2 ) == 0 )
        {
            // We need a special case to handle a string that is just ""
            if ( $lastbit == "\"\"" )
                $bits[] = "";
            else
            // This will replace "" with ", and delete single "'s
                $bits[] = ereg_replace( "\"(\"?)", "\\1", $lastbit);
                
            $quottot = 0;
            $lastbit = "";
        }
        else
        {
            $lastbit = $lastbit . ",";
        }
    }
    
    return $bits;
}