Date: 10/28/98
- Next message: ssb: "[PHP-DEV] CVS update: php3"
- Previous message: ssb: "[PHP-DEV] CVS update: php3/doc/functions"
- Next in thread: shane: "[PHP-DEV] CVS update: php3/functions"
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Date: Wednesday October 28, 1998 @ 0:27
Author: ssb
Update of /repository/php3/functions
In directory asf:/u2/tmp/cvs-serv15755/functions
Modified Files:
xml.c php3_xml.h
Log Message:
added utf8_encode() and utf8_decode()
Index: php3/functions/xml.c
diff -c php3/functions/xml.c:1.13 php3/functions/xml.c:1.14
*** php3/functions/xml.c:1.13 Tue Oct 27 22:35:49 1998
--- php3/functions/xml.c Wed Oct 28 00:27:47 1998
***************
*** 27,33 ****
+----------------------------------------------------------------------+
*/
! /* $Id: xml.c,v 1.13 1998/10/28 03:35:49 ssb Exp $ */
#define IS_EXT_MODULE
#if COMPILE_DL
# if PHP_31
--- 27,33 ----
+----------------------------------------------------------------------+
*/
! /* $Id: xml.c,v 1.14 1998/10/28 05:27:47 ssb Exp $ */
#define IS_EXT_MODULE
#if COMPILE_DL
# if PHP_31
***************
*** 60,66 ****
*/
/* Long-term TODO list:
- * - Support other character sets than ISO-8859-1
* - Fix the expat library so you can install your own memory manager
* functions
*/
--- 60,65 ----
***************
*** 111,119 ****
static char *php3i_pvalstrcpy(pval *);
static void xml_destroy_parser(xml_parser *);
static void xml_set_handler(char **, pval *);
inline static char xml_decode_iso_8859_1(unsigned short);
inline static char xml_decode_us_ascii(unsigned short);
! static char *xml_treat_utf8(const XML_Char *, int, int *, const XML_Char *);
static pval *xml_call_handler(xml_parser *, char *, int, pval **);
static pval *php3i_longpval(long);
static pval *php3i_stringpval(const char *);
--- 110,121 ----
static char *php3i_pvalstrcpy(pval *);
static void xml_destroy_parser(xml_parser *);
static void xml_set_handler(char **, pval *);
+ inline static unsigned short xml_encode_iso_8859_1(unsigned char);
inline static char xml_decode_iso_8859_1(unsigned short);
+ inline static unsigned short xml_encode_us_ascii(char);
inline static char xml_decode_us_ascii(unsigned short);
! static XML_Char *xml_utf8_encode(const char *, int, int *, const XML_Char *);
! static char *xml_utf8_decode(const XML_Char *, int, int *, const XML_Char *);
static pval *xml_call_handler(xml_parser *, char *, int, pval **);
static pval *php3i_longpval(long);
static pval *php3i_stringpval(const char *);
***************
*** 150,155 ****
--- 152,159 ----
PHP_FE(xml_parser_free, NULL)
PHP_FE(xml_parser_set_option, NULL)
PHP_FE(xml_parser_get_option, NULL)
+ PHP_FE(utf8_encode, NULL)
+ PHP_FE(utf8_decode, NULL)
{NULL, NULL, NULL}
};
***************
*** 168,177 ****
* the encoding is currently done internally by expat/xmltok.
*/
xml_encoding xml_encodings[] = {
! { "ISO-8859-1", xml_decode_iso_8859_1, NULL },
! { "US-ASCII", xml_decode_us_ascii, NULL },
! { "UTF-8", NULL, NULL },
! { NULL, NULL, NULL }
};
/* }}} */
--- 172,181 ----
* the encoding is currently done internally by expat/xmltok.
*/
xml_encoding xml_encodings[] = {
! { "ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
! { "US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
! { "UTF-8", NULL, NULL },
! { NULL, NULL, NULL }
};
/* }}} */
***************
*** 395,400 ****
--- 399,414 ----
}
/* }}} */
+ /* {{{ xml_encode_iso_8859_1() */
+
+ inline static unsigned short
+ xml_encode_iso_8859_1(unsigned char c)
+ {
+ php3_printf("c=%d ", c);
+ return (unsigned short)c;
+ }
+
+ /* }}} */
/* {{{ xml_decode_iso_8859_1() */
inline static char
***************
*** 404,409 ****
--- 418,432 ----
}
/* }}} */
+ /* {{{ xml_encode_us_ascii() */
+
+ inline static unsigned short
+ xml_encode_us_ascii(char c)
+ {
+ return (unsigned short)c;
+ }
+
+ /* }}} */
/* {{{ xml_decode_us_ascii() */
inline static char
***************
*** 413,418 ****
--- 436,443 ----
}
/* }}} */
+ /* {{{ xml_get_encoding() */
+
static xml_encoding *
xml_get_encoding(const XML_Char *name)
{
***************
*** 425,434 ****
enc++;
}
return NULL;
}
! /* {{{ xml_treat_utf8 */
static char *
! xml_treat_utf8(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
{
int pos = len;
char *newbuf = emalloc(len);
--- 450,515 ----
enc++;
}
return NULL;
+ }
+
+ /* }}} */
+ /* {{{ xml_utf8_encode */
+ static XML_Char *
+ xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char *encoding)
+ {
+ int pos = len;
+ char *newbuf;
+ unsigned short c;
+ unsigned short (*encoder)(char) = NULL;
+ xml_encoding *enc = xml_get_encoding(encoding);
+
+ *newlen = 0;
+ if (enc) {
+ encoder = enc->encoding_function;
+ } else {
+ /* If the target encoding was unknown, fail */
+ return NULL;
+ }
+ if (encoder == NULL) {
+ /* If no encoder function was specified, return the data as-is.
+ */
+ newbuf = emalloc(len);
+ memcpy(newbuf, s, len);
+ *newlen = len;
+ return newbuf;
+ }
+ /* This is the theoretical max (will never get beyond len * 2 as long
+ * as we are converting from single-byte characters, though) */
+ newbuf = emalloc(len * 4);
+ while (pos > 0) {
+ c = encoder ? encoder((unsigned char)(*s)) : (unsigned short)(*s);
+ if (c < 0x80) {
+ newbuf[(*newlen)++] = c;
+ } else if (c < 0x800) {
+ newbuf[(*newlen)++] = (0xc0 | (c >> 6));
+ newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
+ } else if (c < 0x10000) {
+ newbuf[(*newlen)++] = (0xe0 | (c >> 12));
+ newbuf[(*newlen)++] = (0xc0 | ((c >> 6) & 0x3f));
+ newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
+ } else if (c < 0x200000) {
+ newbuf[(*newlen)++] = (0xf0 | (c >> 18));
+ newbuf[(*newlen)++] = (0xe0 | ((c >> 12) & 0x3f));
+ newbuf[(*newlen)++] = (0xc0 | ((c >> 6) & 0x3f));
+ newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
+ }
+ pos--;
+ s++;
+ }
+ if (*newlen < len * 4) {
+ newbuf = erealloc(newbuf, *newlen);
+ }
+ return newbuf;
}
! /* }}} */
! /* {{{ xml_utf8_decode */
static char *
! xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
{
int pos = len;
char *newbuf = emalloc(len);
***************
*** 449,459 ****
return newbuf;
}
while (pos > 0) {
- #ifdef XML_UNICODE /* defined if we are using 16-bit Unicode characters */
- c = (*s);
- len--;
- s++;
- #else
c = (unsigned char)(*s);
if (c >= 0xf0) { /* four bytes encoded, 21 bits */
c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
--- 530,535 ----
***************
*** 471,477 ****
s++;
pos--;
}
- #endif
newbuf[*newlen] = decoder ? decoder(c) : c;
++*newlen;
}
--- 547,552 ----
***************
*** 521,527 ****
len = php3i_xmlcharlen(s);
}
ret->type = IS_STRING;
! ret->value.str.val = xml_treat_utf8(s, len, &ret->value.str.len, encoding);
return ret;
}
--- 596,602 ----
len = php3i_xmlcharlen(s);
}
ret->type = IS_STRING;
! ret->value.str.val = xml_utf8_decode(s, len, &ret->value.str.len, encoding);
return ret;
}
***************
*** 1216,1221 ****
--- 1291,1338 ----
break;
}
RETVAL_FALSE;
+ }
+
+ /* }}} */
+ /* {{{ string utf8_encode(string data) */
+
+ PHP_FUNCTION(utf8_encode)
+ {
+ pval *arg;
+ XML_Char *encoded;
+ int len;
+ XML_TLS_VARS;
+
+ if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ convert_to_string(arg);
+ encoded = xml_utf8_encode(arg->value.str.val, arg->value.str.len, &len, "ISO-8859-1");
+ if (encoded == NULL) {
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(encoded, len, 0);
+ }
+
+ /* }}} */
+ /* {{{ string utf8_decode(string data) */
+
+ PHP_FUNCTION(utf8_decode)
+ {
+ pval *arg;
+ XML_Char *decoded;
+ int len;
+ XML_TLS_VARS;
+
+ if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ convert_to_string(arg);
+ decoded = xml_utf8_decode(arg->value.str.val, arg->value.str.len, &len, "ISO-8859-1");
+ if (decoded == NULL) {
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(decoded, len, 0);
}
/* }}} */
Index: php3/functions/php3_xml.h
diff -c php3/functions/php3_xml.h:1.6 php3/functions/php3_xml.h:1.7
*** php3/functions/php3_xml.h:1.6 Tue Oct 27 22:35:50 1998
--- php3/functions/php3_xml.h Wed Oct 28 00:27:47 1998
***************
*** 37,42 ****
--- 37,46 ----
#include <xml/xmltok.h>
#include <xml/xmlparse.h>
+ #ifdef XML_UNICODE
+ # error "UTF-16 Unicode support not implemented!"
+ #endif
+
typedef struct {
int le_xml_parser;
XML_Char *default_encoding;
***************
*** 96,101 ****
--- 100,107 ----
PHP_FUNCTION(xml_parser_free);
PHP_FUNCTION(xml_parser_set_option);
PHP_FUNCTION(xml_parser_get_option);
+ PHP_FUNCTION(utf8_encode);
+ PHP_FUNCTION(utf8_decode);
#else /* !HAVE_LIBEXPAT */
-- PHP Development Mailing List http://www.php.net/ To unsubscribe send an empty message to php-dev-unsubscribe <email protected> For help: php-dev-help <email protected>
- Next message: ssb: "[PHP-DEV] CVS update: php3"
- Previous message: ssb: "[PHP-DEV] CVS update: php3/doc/functions"
- Next in thread: shane: "[PHP-DEV] CVS update: php3/functions"
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

