[PHP-DEV] CVS update: php3/functions From: ssb (php-dev <email protected>)
Date: 10/27/98

Date: Tuesday October 27, 1998 @ 22:35
Author: ssb

Update of /repository/php3/functions
In directory asf:/u2/tmp/cvs-serv14955/functions

Modified Files:
        xml.c php3_xml.h
Log Message:
Unicode support (still no UTF-16, of course)
Index: php3/functions/xml.c
diff -c php3/functions/xml.c:1.12 php3/functions/xml.c:1.13
*** php3/functions/xml.c:1.12 Tue Oct 27 17:42:53 1998
--- php3/functions/xml.c Tue Oct 27 22:35:49 1998
***************
*** 27,33 ****
     +----------------------------------------------------------------------+
   */
  
! /* $Id: xml.c,v 1.12 1998/10/27 22:42:53 ssb Exp $ */
  #define IS_EXT_MODULE
  #if COMPILE_DL
  # if PHP_31
--- 27,33 ----
     +----------------------------------------------------------------------+
   */
  
! /* $Id: xml.c,v 1.13 1998/10/28 03:35:49 ssb Exp $ */
  #define IS_EXT_MODULE
  #if COMPILE_DL
  # if PHP_31
***************
*** 111,121 ****
  static char *php3i_pvalstrcpy(pval *);
  static void xml_destroy_parser(xml_parser *);
  static void xml_set_handler(char **, pval *);
! static char *xml_utf8_decode(const XML_Char *, int, int *);
  static pval *xml_call_handler(xml_parser *, char *, int, pval **);
  static pval *php3i_longpval(long);
  static pval *php3i_stringpval(const char *);
! static pval *php3i_xmlcharpval(const XML_Char *, int);
  static int php3i_xmlcharlen(const XML_Char *);
  
  void php3i_xml_startElementHandler(void *, const char *, const char **);
--- 111,123 ----
  static char *php3i_pvalstrcpy(pval *);
  static void xml_destroy_parser(xml_parser *);
  static void xml_set_handler(char **, pval *);
! inline static char xml_decode_iso_8859_1(unsigned short);
! inline static char xml_decode_us_ascii(unsigned short);
! static char *xml_treat_utf8(const XML_Char *, int, int *, const XML_Char *);
  static pval *xml_call_handler(xml_parser *, char *, int, pval **);
  static pval *php3i_longpval(long);
  static pval *php3i_stringpval(const char *);
! static pval *php3i_xmlcharpval(const XML_Char *, int, const XML_Char *);
  static int php3i_xmlcharlen(const XML_Char *);
  
  void php3i_xml_startElementHandler(void *, const char *, const char **);
***************
*** 162,167 ****
--- 164,179 ----
      STANDARD_MODULE_PROPERTIES
  };
  
+ /* All the encoding functions are set to NULL right now, since all
+ * the encoding is currently done internally by expat/xmltok.
+ */
+ xml_encoding xml_encodings[] = {
+ { "ISO-8859-1", xml_decode_iso_8859_1, NULL },
+ { "US-ASCII", xml_decode_us_ascii, NULL },
+ { "UTF-8", NULL, NULL },
+ { NULL, NULL, NULL }
+ };
+
  /* }}} */
  /* {{{ startup, shutdown and info functions */
  
***************
*** 217,222 ****
--- 229,235 ----
          REGISTER_LONG_CONSTANT("XML_ERROR_EXTERNAL_ENTITY_HANDLING", XML_ERROR_EXTERNAL_ENTITY_HANDLING, CONST_CS|CONST_PERSISTENT);
  
          REGISTER_LONG_CONSTANT("XML_OPTION_CASE_FOLDING", PHP3_XML_OPTION_CASE_FOLDING, CONST_CS|CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP3_XML_OPTION_TARGET_ENCODING, CONST_CS|CONST_PERSISTENT);
  
          return SUCCESS;
  }
***************
*** 381,398 ****
          return NULL;
  }
  
  /* }}} */
! /* {{{ xml_utf8_decode */
  static char *
! xml_utf8_decode(const XML_Char *s, int len, int *newlen)
  {
! int c, pos = len;
          char *newbuf = emalloc(len);
  
          *newlen = 0;
!
          while (pos > 0) {
! #ifdef XML_UNICODE /* defined if we are using multi-byte characters */
                  c = (*s);
                  len--;
                  s++;
--- 394,455 ----
          return NULL;
  }
  
+ /* }}} */
+ /* {{{ xml_decode_iso_8859_1() */
+
+ inline static char
+ xml_decode_iso_8859_1(unsigned short c)
+ {
+ return (char)(c > 0xff ? '?' : c);
+ }
+
  /* }}} */
! /* {{{ xml_decode_us_ascii() */
!
! inline static char
! xml_decode_us_ascii(unsigned short c)
! {
! return (char)(c > 0x7f ? '?' : c);
! }
!
! /* }}} */
! static xml_encoding *
! xml_get_encoding(const XML_Char *name)
! {
! xml_encoding *enc = &xml_encodings[0];
!
! while (enc && enc->name) {
! if (strcasecmp(name, enc->name) == 0) {
! return enc;
! }
! enc++;
! }
! return NULL;
! }
! /* {{{ xml_treat_utf8 */
  static char *
! xml_treat_utf8(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
  {
! int pos = len;
          char *newbuf = emalloc(len);
+ unsigned short c;
+ char (*decoder)(unsigned short) = NULL;
+ xml_encoding *enc = xml_get_encoding(encoding);
  
          *newlen = 0;
! if (enc) {
! decoder = enc->decoding_function;
! }
! if (decoder == NULL) {
! /* If the target encoding was unknown, or no decoder function
! * was specified, return the UTF-8-encoded data as-is.
! */
! memcpy(newbuf, s, len);
! *newlen = len;
! return newbuf;
! }
          while (pos > 0) {
! #ifdef XML_UNICODE /* defined if we are using 16-bit Unicode characters */
                  c = (*s);
                  len--;
                  s++;
***************
*** 415,424 ****
                          pos--;
                  }
  #endif
! if (c > 0xff) {
! c = '?';
! }
! newbuf[*newlen] = c;
                  ++*newlen;
      }
          if (*newlen < len) {
--- 472,478 ----
                          pos--;
                  }
  #endif
! newbuf[*newlen] = decoder ? decoder(c) : c;
                  ++*newlen;
      }
          if (*newlen < len) {
***************
*** 455,461 ****
  /* }}} */
      /* {{{ php3i_xmlcharpval() */
  
! static pval *php3i_xmlcharpval(const XML_Char *s, int len)
  {
          pval *ret = emalloc(sizeof(pval));
  
--- 509,515 ----
  /* }}} */
      /* {{{ php3i_xmlcharpval() */
  
! static pval *php3i_xmlcharpval(const XML_Char *s, int len, const XML_Char *encoding)
  {
          pval *ret = emalloc(sizeof(pval));
  
***************
*** 467,473 ****
                  len = php3i_xmlcharlen(s);
          }
          ret->type = IS_STRING;
! ret->value.str.val = xml_utf8_decode(s, len, &ret->value.str.len);
          return ret;
  }
  
--- 521,527 ----
                  len = php3i_xmlcharlen(s);
          }
          ret->type = IS_STRING;
! ret->value.str.val = xml_treat_utf8(s, len, &ret->value.str.len, encoding);
          return ret;
  }
  
***************
*** 577,583 ****
                  pval *retval, *args[2];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(s, len);
                  if ((retval = xml_call_handler(parser, parser->characterDataHandler, 2, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
--- 631,637 ----
                  pval *retval, *args[2];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(s, len, parser->target_encoding);
                  if ((retval = xml_call_handler(parser, parser->characterDataHandler, 2, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
***************
*** 599,606 ****
                  pval *retval, *args[3];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(target, 0);
! args[2] = php3i_xmlcharpval(data, 0);
                  if ((retval = xml_call_handler(parser, parser->processingInstructionHandler, 3, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
--- 653,660 ----
                  pval *retval, *args[3];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(target, 0, parser->target_encoding);
! args[2] = php3i_xmlcharpval(data, 0, parser->target_encoding);
                  if ((retval = xml_call_handler(parser, parser->processingInstructionHandler, 3, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
***************
*** 620,626 ****
                  pval *retval, *args[2];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(s, len);
                  if ((retval = xml_call_handler(parser, parser->defaultHandler, 2, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
--- 674,680 ----
                  pval *retval, *args[2];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(s, len, parser->target_encoding);
                  if ((retval = xml_call_handler(parser, parser->defaultHandler, 2, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
***************
*** 645,655 ****
                  pval *retval, *args[5];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(entityName, 0);
! args[2] = php3i_xmlcharpval(base, 0);
! args[3] = php3i_xmlcharpval(systemId, 0);
! args[4] = php3i_xmlcharpval(publicId, 0);
! args[5] = php3i_xmlcharpval(notationName, 0);
                  if ((retval = xml_call_handler(parser, parser->unparsedEntityDeclHandler, 6, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
--- 699,709 ----
                  pval *retval, *args[5];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(entityName, 0, parser->target_encoding);
! args[2] = php3i_xmlcharpval(base, 0, parser->target_encoding);
! args[3] = php3i_xmlcharpval(systemId, 0, parser->target_encoding);
! args[4] = php3i_xmlcharpval(publicId, 0, parser->target_encoding);
! args[5] = php3i_xmlcharpval(notationName, 0, parser->target_encoding);
                  if ((retval = xml_call_handler(parser, parser->unparsedEntityDeclHandler, 6, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
***************
*** 674,683 ****
                  pval *retval, *args[5];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(notationName, 0);
! args[2] = php3i_xmlcharpval(base, 0);
! args[3] = php3i_xmlcharpval(systemId, 0);
! args[4] = php3i_xmlcharpval(publicId, 0);
                  if ((retval = xml_call_handler(parser, parser->notationDeclHandler, 5, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
--- 728,737 ----
                  pval *retval, *args[5];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(notationName, 0, parser->target_encoding);
! args[2] = php3i_xmlcharpval(base, 0, parser->target_encoding);
! args[3] = php3i_xmlcharpval(systemId, 0, parser->target_encoding);
! args[4] = php3i_xmlcharpval(publicId, 0, parser->target_encoding);
                  if ((retval = xml_call_handler(parser, parser->notationDeclHandler, 5, args))) {
                          php3tls_pval_destructor(retval);
                          efree(retval);
***************
*** 703,712 ****
                  pval *retval, *args[5];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(openEntityNames, 0);
! args[2] = php3i_xmlcharpval(base, 0);
! args[3] = php3i_xmlcharpval(systemId, 0);
! args[4] = php3i_xmlcharpval(publicId, 0);
                  if ((retval = xml_call_handler(parser, parser->externalEntityRefHandler, 5, args))) {
                          convert_to_long(retval);
                          ret = retval->value.lval;
--- 757,766 ----
                  pval *retval, *args[5];
  
                  args[0] = php3i_longpval(parser->index);
! args[1] = php3i_xmlcharpval(openEntityNames, 0, parser->target_encoding);
! args[2] = php3i_xmlcharpval(base, 0, parser->target_encoding);
! args[3] = php3i_xmlcharpval(systemId, 0, parser->target_encoding);
! args[4] = php3i_xmlcharpval(publicId, 0, parser->target_encoding);
                  if ((retval = xml_call_handler(parser, parser->externalEntityRefHandler, 5, args))) {
                          convert_to_long(retval);
                          ret = retval->value.lval;
***************
*** 743,748 ****
--- 797,805 ----
  
          if (argc == 1) {
                  convert_to_string(encodingArg);
+ /* The supported encoding types are hardcoded here because
+ * we are limited to the encodings supported by expat/xmltok.
+ */
                  if (strncasecmp(encodingArg->value.str.val, "ISO-8859-1",
                                                  encodingArg->value.str.len) == 0) {
                          encoding = "ISO-8859-1";
***************
*** 752,760 ****
                  } else if (strncasecmp(encodingArg->value.str.val, "US-ASCII",
                                                  encodingArg->value.str.len) == 0) {
                          encoding = "US-ASCII";
! } else if (strncasecmp(encodingArg->value.str.val, "UTF-16",
! encodingArg->value.str.len) == 0) {
! php3_error(E_WARNING, "%s: unsupported encoding", thisfunc);
                          RETURN_FALSE;
                  }
          } else {
--- 809,817 ----
                  } else if (strncasecmp(encodingArg->value.str.val, "US-ASCII",
                                                  encodingArg->value.str.len) == 0) {
                          encoding = "US-ASCII";
! } else { /* UTF-16 not supported */
! php3_error(E_WARNING, "%s: unsupported source encoding \"%s\"",
! thisfunc, encodingArg->value.str.val);
                          RETURN_FALSE;
                  }
          } else {
***************
*** 763,768 ****
--- 820,826 ----
  
          parser = ecalloc(sizeof(xml_parser), 1);
          parser->parser = XML_ParserCreate(encoding);
+ parser->target_encoding = encoding;
          XML_SetUserData(parser->parser, parser);
          XML_SetElementHandler(parser->parser, php3i_xml_startElementHandler, php3i_xml_endElementHandler);
          XML_SetCharacterDataHandler(parser->parser, php3i_xml_characterDataHandler);
***************
*** 1108,1113 ****
--- 1166,1181 ----
                          convert_to_long(val);
                          parser->case_folding = val->value.lval;
                          break;
+ case PHP3_XML_OPTION_TARGET_ENCODING: {
+ xml_encoding *enc = xml_get_encoding(val->value.str.val);
+ if (enc == NULL) {
+ php3_error(E_WARNING, "%s: unsupported target encoding \"%s\"",
+ thisfunc, val->value.str.val);
+ RETURN_FALSE;
+ }
+ parser->target_encoding = enc->name;
+ break;
+ }
                  default:
                          php3_error(E_WARNING, "%s: unknown option", thisfunc);
                          RETURN_FALSE;
***************
*** 1138,1143 ****
--- 1206,1214 ----
          switch (opt->value.lval) {
                  case PHP3_XML_OPTION_CASE_FOLDING:
                          RETURN_LONG(parser->case_folding);
+ break;
+ case PHP3_XML_OPTION_TARGET_ENCODING:
+ RETURN_STRING(parser->target_encoding, 1);
                          break;
                  default:
                          php3_error(E_WARNING, "%s: unknown option", thisfunc);
Index: php3/functions/php3_xml.h
diff -c php3/functions/php3_xml.h:1.5 php3/functions/php3_xml.h:1.6
*** php3/functions/php3_xml.h:1.5 Tue Oct 27 14:56:41 1998
--- php3/functions/php3_xml.h Tue Oct 27 22:35:50 1998
***************
*** 46,51 ****
--- 46,52 ----
          int index;
          int case_folding;
          XML_Parser parser;
+ XML_Char *target_encoding;
          char *startElementHandler;
          char *endElementHandler;
          char *characterDataHandler;
***************
*** 58,68 ****
          XML_Char *baseURI;
  } xml_parser;
  
  extern php3_module_entry xml_module_entry;
  # define xml_module_ptr &xml_module_entry
  
  enum php3_xml_option {
! PHP3_XML_OPTION_CASE_FOLDING
  };
  
  # define RETURN_OUT_OF_MEMORY \
--- 59,78 ----
          XML_Char *baseURI;
  } xml_parser;
  
+
+ typedef struct {
+ XML_Char *name;
+ char (*decoding_function)(unsigned short);
+ unsigned short (*encoding_function)(char);
+ } xml_encoding;
+
+
  extern php3_module_entry xml_module_entry;
  # define xml_module_ptr &xml_module_entry
  
  enum php3_xml_option {
! PHP3_XML_OPTION_CASE_FOLDING,
! PHP3_XML_OPTION_TARGET_ENCODING
  };
  
  # define RETURN_OUT_OF_MEMORY \

--
PHP Development Mailing List   http://www.php.net/
To unsubscribe send an empty message to php-dev-unsubscribe <email protected>
For help: php-dev-help <email protected>