Date: 10/27/98
- Next message: ssb: "[PHP-DEV] CVS update: php3/doc"
- Previous message: ssb: "[PHP-DEV] CVS update: php3/doc/functions"
- Next in thread: ssb: "[PHP-DEV] CVS update: php3/functions"
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Date: Tuesday October 27, 1998 @ 22:35
Author: ssb
Update of /repository/php3/functions
In directory asf:/u2/tmp/cvs-serv14955/functions
Modified Files:
xml.c php3_xml.h
Log Message:
Unicode support (still no UTF-16, of course)
Index: php3/functions/xml.c
diff -c php3/functions/xml.c:1.12 php3/functions/xml.c:1.13
*** php3/functions/xml.c:1.12 Tue Oct 27 17:42:53 1998
--- php3/functions/xml.c Tue Oct 27 22:35:49 1998
***************
*** 27,33 ****
+----------------------------------------------------------------------+
*/
! /* $Id: xml.c,v 1.12 1998/10/27 22:42:53 ssb Exp $ */
#define IS_EXT_MODULE
#if COMPILE_DL
# if PHP_31
--- 27,33 ----
+----------------------------------------------------------------------+
*/
! /* $Id: xml.c,v 1.13 1998/10/28 03:35:49 ssb Exp $ */
#define IS_EXT_MODULE
#if COMPILE_DL
# if PHP_31
***************
*** 111,121 ****
static char *php3i_pvalstrcpy(pval *);
static void xml_destroy_parser(xml_parser *);
static void xml_set_handler(char **, pval *);
! static char *xml_utf8_decode(const XML_Char *, int, int *);
static pval *xml_call_handler(xml_parser *, char *, int, pval **);
static pval *php3i_longpval(long);
static pval *php3i_stringpval(const char *);
! static pval *php3i_xmlcharpval(const XML_Char *, int);
static int php3i_xmlcharlen(const XML_Char *);
void php3i_xml_startElementHandler(void *, const char *, const char **);
--- 111,123 ----
static char *php3i_pvalstrcpy(pval *);
static void xml_destroy_parser(xml_parser *);
static void xml_set_handler(char **, pval *);
! inline static char xml_decode_iso_8859_1(unsigned short);
! inline static char xml_decode_us_ascii(unsigned short);
! static char *xml_treat_utf8(const XML_Char *, int, int *, const XML_Char *);
static pval *xml_call_handler(xml_parser *, char *, int, pval **);
static pval *php3i_longpval(long);
static pval *php3i_stringpval(const char *);
! static pval *php3i_xmlcharpval(const XML_Char *, int, const XML_Char *);
static int php3i_xmlcharlen(const XML_Char *);
void php3i_xml_startElementHandler(void *, const char *, const char **);
***************
*** 162,167 ****
--- 164,179 ----
STANDARD_MODULE_PROPERTIES
};
+ /* All the encoding functions are set to NULL right now, since all
+ * the encoding is currently done internally by expat/xmltok.
+ */
+ xml_encoding xml_encodings[] = {
+ { "ISO-8859-1", xml_decode_iso_8859_1, NULL },
+ { "US-ASCII", xml_decode_us_ascii, NULL },
+ { "UTF-8", NULL, NULL },
+ { NULL, NULL, NULL }
+ };
+
/* }}} */
/* {{{ startup, shutdown and info functions */
***************
*** 217,222 ****
--- 229,235 ----
REGISTER_LONG_CONSTANT("XML_ERROR_EXTERNAL_ENTITY_HANDLING", XML_ERROR_EXTERNAL_ENTITY_HANDLING, CONST_CS|CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("XML_OPTION_CASE_FOLDING", PHP3_XML_OPTION_CASE_FOLDING, CONST_CS|CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP3_XML_OPTION_TARGET_ENCODING, CONST_CS|CONST_PERSISTENT);
return SUCCESS;
}
***************
*** 381,398 ****
return NULL;
}
/* }}} */
! /* {{{ xml_utf8_decode */
static char *
! xml_utf8_decode(const XML_Char *s, int len, int *newlen)
{
! int c, pos = len;
char *newbuf = emalloc(len);
*newlen = 0;
!
while (pos > 0) {
! #ifdef XML_UNICODE /* defined if we are using multi-byte characters */
c = (*s);
len--;
s++;
--- 394,455 ----
return NULL;
}
+ /* }}} */
+ /* {{{ xml_decode_iso_8859_1() */
+
+ inline static char
+ xml_decode_iso_8859_1(unsigned short c)
+ {
+ return (char)(c > 0xff ? '?' : c);
+ }
+
/* }}} */
! /* {{{ xml_decode_us_ascii() */
!
! inline static char
! xml_decode_us_ascii(unsigned short c)
! {
! return (char)(c > 0x7f ? '?' : c);
! }
!
! /* }}} */
! static xml_encoding *
! xml_get_encoding(const XML_Char *name)
! {
! xml_encoding *enc = &xml_encodings[0];
!
! while (enc && enc->name) {
! if (strcasecmp(name, enc->name) == 0) {
! return enc;
! }
! enc++;
! }
! return NULL;
! }
! /* {{{ xml_treat_utf8 */
static char *
! xml_treat_utf8(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
{
! int pos = len;
char *newbuf = emalloc(len);
+ unsigned short c;
+ char (*decoder)(unsigned short) = NULL;
+ xml_encoding *enc = xml_get_encoding(encoding);
*newlen = 0;
! if (enc) {
! decoder = enc->decoding_function;
! }
! if (decoder == NULL) {
! /* If the target encoding was unknown, or no decoder function
! * was specified, return the UTF-8-encoded data as-is.
! */
! memcpy(newbuf, s, len);
! *newlen = len;
! return newbuf;
! }
while (pos > 0) {
! #ifdef XML_UNICODE /* defined if we are using 16-bit Unicode characters */
c = (*s);
len--;
s++;
***************
*** 415,424 ****
pos--;
}
#endif
! if (c > 0xff) {
! c = '?';
! }
! newbuf[*newlen] = c;
++*newlen;
}
if (*newlen < len) {
--- 472,478 ----
pos--;
}
#endif
! newbuf[*newlen] = decoder ? decoder(c) : c;
++*newlen;
}
if (*newlen < len) {
***************
*** 455,461 ****
/* }}} */
/* {{{ php3i_xmlcharpval() */
! static pval *php3i_xmlcharpval(const XML_Char *s, int len)
{
pval *ret = emalloc(sizeof(pval));
--- 509,515 ----
/* }}} */
/* {{{ php3i_xmlcharpval() */
! static pval *php3i_xmlcharpval(const XML_Char *s, int len, const XML_Char *encoding)
{
pval *ret = emalloc(sizeof(pval));
***************
*** 467,473 ****
len = php3i_xmlcharlen(s);
}
ret->type = IS_STRING;
! ret->value.str.val = xml_utf8_decode(s, len, &ret->value.str.len);
return ret;
}
--- 521,527 ----
len = php3i_xmlcharlen(s);
}
ret->type = IS_STRING;
! ret->value.str.val = xml_treat_utf8(s, len, &ret->value.str.len, encoding);
return ret;
}
***************
*** 577,583 ****
pval *retval, *args[2];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(s, len);
if ((retval = xml_call_handler(parser, parser->characterDataHandler, 2, args))) {
php3tls_pval_destructor(retval);
efree(retval);
--- 631,637 ----
pval *retval, *args[2];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(s, len, parser->target_encoding);
if ((retval = xml_call_handler(parser, parser->characterDataHandler, 2, args))) {
php3tls_pval_destructor(retval);
efree(retval);
***************
*** 599,606 ****
pval *retval, *args[3];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(target, 0);
! args[2] = php3i_xmlcharpval(data, 0);
if ((retval = xml_call_handler(parser, parser->processingInstructionHandler, 3, args))) {
php3tls_pval_destructor(retval);
efree(retval);
--- 653,660 ----
pval *retval, *args[3];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(target, 0, parser->target_encoding);
! args[2] = php3i_xmlcharpval(data, 0, parser->target_encoding);
if ((retval = xml_call_handler(parser, parser->processingInstructionHandler, 3, args))) {
php3tls_pval_destructor(retval);
efree(retval);
***************
*** 620,626 ****
pval *retval, *args[2];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(s, len);
if ((retval = xml_call_handler(parser, parser->defaultHandler, 2, args))) {
php3tls_pval_destructor(retval);
efree(retval);
--- 674,680 ----
pval *retval, *args[2];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(s, len, parser->target_encoding);
if ((retval = xml_call_handler(parser, parser->defaultHandler, 2, args))) {
php3tls_pval_destructor(retval);
efree(retval);
***************
*** 645,655 ****
pval *retval, *args[5];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(entityName, 0);
! args[2] = php3i_xmlcharpval(base, 0);
! args[3] = php3i_xmlcharpval(systemId, 0);
! args[4] = php3i_xmlcharpval(publicId, 0);
! args[5] = php3i_xmlcharpval(notationName, 0);
if ((retval = xml_call_handler(parser, parser->unparsedEntityDeclHandler, 6, args))) {
php3tls_pval_destructor(retval);
efree(retval);
--- 699,709 ----
pval *retval, *args[5];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(entityName, 0, parser->target_encoding);
! args[2] = php3i_xmlcharpval(base, 0, parser->target_encoding);
! args[3] = php3i_xmlcharpval(systemId, 0, parser->target_encoding);
! args[4] = php3i_xmlcharpval(publicId, 0, parser->target_encoding);
! args[5] = php3i_xmlcharpval(notationName, 0, parser->target_encoding);
if ((retval = xml_call_handler(parser, parser->unparsedEntityDeclHandler, 6, args))) {
php3tls_pval_destructor(retval);
efree(retval);
***************
*** 674,683 ****
pval *retval, *args[5];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(notationName, 0);
! args[2] = php3i_xmlcharpval(base, 0);
! args[3] = php3i_xmlcharpval(systemId, 0);
! args[4] = php3i_xmlcharpval(publicId, 0);
if ((retval = xml_call_handler(parser, parser->notationDeclHandler, 5, args))) {
php3tls_pval_destructor(retval);
efree(retval);
--- 728,737 ----
pval *retval, *args[5];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(notationName, 0, parser->target_encoding);
! args[2] = php3i_xmlcharpval(base, 0, parser->target_encoding);
! args[3] = php3i_xmlcharpval(systemId, 0, parser->target_encoding);
! args[4] = php3i_xmlcharpval(publicId, 0, parser->target_encoding);
if ((retval = xml_call_handler(parser, parser->notationDeclHandler, 5, args))) {
php3tls_pval_destructor(retval);
efree(retval);
***************
*** 703,712 ****
pval *retval, *args[5];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(openEntityNames, 0);
! args[2] = php3i_xmlcharpval(base, 0);
! args[3] = php3i_xmlcharpval(systemId, 0);
! args[4] = php3i_xmlcharpval(publicId, 0);
if ((retval = xml_call_handler(parser, parser->externalEntityRefHandler, 5, args))) {
convert_to_long(retval);
ret = retval->value.lval;
--- 757,766 ----
pval *retval, *args[5];
args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(openEntityNames, 0, parser->target_encoding);
! args[2] = php3i_xmlcharpval(base, 0, parser->target_encoding);
! args[3] = php3i_xmlcharpval(systemId, 0, parser->target_encoding);
! args[4] = php3i_xmlcharpval(publicId, 0, parser->target_encoding);
if ((retval = xml_call_handler(parser, parser->externalEntityRefHandler, 5, args))) {
convert_to_long(retval);
ret = retval->value.lval;
***************
*** 743,748 ****
--- 797,805 ----
if (argc == 1) {
convert_to_string(encodingArg);
+ /* The supported encoding types are hardcoded here because
+ * we are limited to the encodings supported by expat/xmltok.
+ */
if (strncasecmp(encodingArg->value.str.val, "ISO-8859-1",
encodingArg->value.str.len) == 0) {
encoding = "ISO-8859-1";
***************
*** 752,760 ****
} else if (strncasecmp(encodingArg->value.str.val, "US-ASCII",
encodingArg->value.str.len) == 0) {
encoding = "US-ASCII";
! } else if (strncasecmp(encodingArg->value.str.val, "UTF-16",
! encodingArg->value.str.len) == 0) {
! php3_error(E_WARNING, "%s: unsupported encoding", thisfunc);
RETURN_FALSE;
}
} else {
--- 809,817 ----
} else if (strncasecmp(encodingArg->value.str.val, "US-ASCII",
encodingArg->value.str.len) == 0) {
encoding = "US-ASCII";
! } else { /* UTF-16 not supported */
! php3_error(E_WARNING, "%s: unsupported source encoding \"%s\"",
! thisfunc, encodingArg->value.str.val);
RETURN_FALSE;
}
} else {
***************
*** 763,768 ****
--- 820,826 ----
parser = ecalloc(sizeof(xml_parser), 1);
parser->parser = XML_ParserCreate(encoding);
+ parser->target_encoding = encoding;
XML_SetUserData(parser->parser, parser);
XML_SetElementHandler(parser->parser, php3i_xml_startElementHandler, php3i_xml_endElementHandler);
XML_SetCharacterDataHandler(parser->parser, php3i_xml_characterDataHandler);
***************
*** 1108,1113 ****
--- 1166,1181 ----
convert_to_long(val);
parser->case_folding = val->value.lval;
break;
+ case PHP3_XML_OPTION_TARGET_ENCODING: {
+ xml_encoding *enc = xml_get_encoding(val->value.str.val);
+ if (enc == NULL) {
+ php3_error(E_WARNING, "%s: unsupported target encoding \"%s\"",
+ thisfunc, val->value.str.val);
+ RETURN_FALSE;
+ }
+ parser->target_encoding = enc->name;
+ break;
+ }
default:
php3_error(E_WARNING, "%s: unknown option", thisfunc);
RETURN_FALSE;
***************
*** 1138,1143 ****
--- 1206,1214 ----
switch (opt->value.lval) {
case PHP3_XML_OPTION_CASE_FOLDING:
RETURN_LONG(parser->case_folding);
+ break;
+ case PHP3_XML_OPTION_TARGET_ENCODING:
+ RETURN_STRING(parser->target_encoding, 1);
break;
default:
php3_error(E_WARNING, "%s: unknown option", thisfunc);
Index: php3/functions/php3_xml.h
diff -c php3/functions/php3_xml.h:1.5 php3/functions/php3_xml.h:1.6
*** php3/functions/php3_xml.h:1.5 Tue Oct 27 14:56:41 1998
--- php3/functions/php3_xml.h Tue Oct 27 22:35:50 1998
***************
*** 46,51 ****
--- 46,52 ----
int index;
int case_folding;
XML_Parser parser;
+ XML_Char *target_encoding;
char *startElementHandler;
char *endElementHandler;
char *characterDataHandler;
***************
*** 58,68 ****
XML_Char *baseURI;
} xml_parser;
extern php3_module_entry xml_module_entry;
# define xml_module_ptr &xml_module_entry
enum php3_xml_option {
! PHP3_XML_OPTION_CASE_FOLDING
};
# define RETURN_OUT_OF_MEMORY \
--- 59,78 ----
XML_Char *baseURI;
} xml_parser;
+
+ typedef struct {
+ XML_Char *name;
+ char (*decoding_function)(unsigned short);
+ unsigned short (*encoding_function)(char);
+ } xml_encoding;
+
+
extern php3_module_entry xml_module_entry;
# define xml_module_ptr &xml_module_entry
enum php3_xml_option {
! PHP3_XML_OPTION_CASE_FOLDING,
! PHP3_XML_OPTION_TARGET_ENCODING
};
# define RETURN_OUT_OF_MEMORY \
-- PHP Development Mailing List http://www.php.net/ To unsubscribe send an empty message to php-dev-unsubscribe <email protected> For help: php-dev-help <email protected>
- Next message: ssb: "[PHP-DEV] CVS update: php3/doc"
- Previous message: ssb: "[PHP-DEV] CVS update: php3/doc/functions"
- Next in thread: ssb: "[PHP-DEV] CVS update: php3/functions"
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

