[PHP-DEV] CVS update: php3/functions From: ssb (php-dev <email protected>)
Date: 10/28/98

Date: Wednesday October 28, 1998 @ 0:27
Author: ssb

Update of /repository/php3/functions
In directory asf:/u2/tmp/cvs-serv15755/functions

Modified Files:
        xml.c php3_xml.h
Log Message:
added utf8_encode() and utf8_decode()
Index: php3/functions/xml.c
diff -c php3/functions/xml.c:1.13 php3/functions/xml.c:1.14
*** php3/functions/xml.c:1.13 Tue Oct 27 22:35:49 1998
--- php3/functions/xml.c Wed Oct 28 00:27:47 1998
***************
*** 27,33 ****
     +----------------------------------------------------------------------+
   */
  
! /* $Id: xml.c,v 1.13 1998/10/28 03:35:49 ssb Exp $ */
  #define IS_EXT_MODULE
  #if COMPILE_DL
  # if PHP_31
--- 27,33 ----
     +----------------------------------------------------------------------+
   */
  
! /* $Id: xml.c,v 1.14 1998/10/28 05:27:47 ssb Exp $ */
  #define IS_EXT_MODULE
  #if COMPILE_DL
  # if PHP_31
***************
*** 60,66 ****
   */
  
  /* Long-term TODO list:
- * - Support other character sets than ISO-8859-1
   * - Fix the expat library so you can install your own memory manager
   * functions
   */
--- 60,65 ----
***************
*** 111,119 ****
  static char *php3i_pvalstrcpy(pval *);
  static void xml_destroy_parser(xml_parser *);
  static void xml_set_handler(char **, pval *);
  inline static char xml_decode_iso_8859_1(unsigned short);
  inline static char xml_decode_us_ascii(unsigned short);
! static char *xml_treat_utf8(const XML_Char *, int, int *, const XML_Char *);
  static pval *xml_call_handler(xml_parser *, char *, int, pval **);
  static pval *php3i_longpval(long);
  static pval *php3i_stringpval(const char *);
--- 110,121 ----
  static char *php3i_pvalstrcpy(pval *);
  static void xml_destroy_parser(xml_parser *);
  static void xml_set_handler(char **, pval *);
+ inline static unsigned short xml_encode_iso_8859_1(unsigned char);
  inline static char xml_decode_iso_8859_1(unsigned short);
+ inline static unsigned short xml_encode_us_ascii(char);
  inline static char xml_decode_us_ascii(unsigned short);
! static XML_Char *xml_utf8_encode(const char *, int, int *, const XML_Char *);
! static char *xml_utf8_decode(const XML_Char *, int, int *, const XML_Char *);
  static pval *xml_call_handler(xml_parser *, char *, int, pval **);
  static pval *php3i_longpval(long);
  static pval *php3i_stringpval(const char *);
***************
*** 150,155 ****
--- 152,159 ----
      PHP_FE(xml_parser_free, NULL)
          PHP_FE(xml_parser_set_option, NULL)
          PHP_FE(xml_parser_get_option, NULL)
+ PHP_FE(utf8_encode, NULL)
+ PHP_FE(utf8_decode, NULL)
      {NULL, NULL, NULL}
  };
  
***************
*** 168,177 ****
   * the encoding is currently done internally by expat/xmltok.
   */
  xml_encoding xml_encodings[] = {
! { "ISO-8859-1", xml_decode_iso_8859_1, NULL },
! { "US-ASCII", xml_decode_us_ascii, NULL },
! { "UTF-8", NULL, NULL },
! { NULL, NULL, NULL }
  };
  
  /* }}} */
--- 172,181 ----
   * the encoding is currently done internally by expat/xmltok.
   */
  xml_encoding xml_encodings[] = {
! { "ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
! { "US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
! { "UTF-8", NULL, NULL },
! { NULL, NULL, NULL }
  };
  
  /* }}} */
***************
*** 395,400 ****
--- 399,414 ----
  }
  
  /* }}} */
+ /* {{{ xml_encode_iso_8859_1() */
+
+ inline static unsigned short
+ xml_encode_iso_8859_1(unsigned char c)
+ {
+ php3_printf("c=%d ", c);
+ return (unsigned short)c;
+ }
+
+ /* }}} */
      /* {{{ xml_decode_iso_8859_1() */
  
  inline static char
***************
*** 404,409 ****
--- 418,432 ----
  }
  
  /* }}} */
+ /* {{{ xml_encode_us_ascii() */
+
+ inline static unsigned short
+ xml_encode_us_ascii(char c)
+ {
+ return (unsigned short)c;
+ }
+
+ /* }}} */
      /* {{{ xml_decode_us_ascii() */
  
  inline static char
***************
*** 413,418 ****
--- 436,443 ----
  }
  
  /* }}} */
+ /* {{{ xml_get_encoding() */
+
  static xml_encoding *
  xml_get_encoding(const XML_Char *name)
  {
***************
*** 425,434 ****
                  enc++;
          }
          return NULL;
  }
! /* {{{ xml_treat_utf8 */
  static char *
! xml_treat_utf8(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
  {
          int pos = len;
          char *newbuf = emalloc(len);
--- 450,515 ----
                  enc++;
          }
          return NULL;
+ }
+
+ /* }}} */
+ /* {{{ xml_utf8_encode */
+ static XML_Char *
+ xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char *encoding)
+ {
+ int pos = len;
+ char *newbuf;
+ unsigned short c;
+ unsigned short (*encoder)(char) = NULL;
+ xml_encoding *enc = xml_get_encoding(encoding);
+
+ *newlen = 0;
+ if (enc) {
+ encoder = enc->encoding_function;
+ } else {
+ /* If the target encoding was unknown, fail */
+ return NULL;
+ }
+ if (encoder == NULL) {
+ /* If no encoder function was specified, return the data as-is.
+ */
+ newbuf = emalloc(len);
+ memcpy(newbuf, s, len);
+ *newlen = len;
+ return newbuf;
+ }
+ /* This is the theoretical max (will never get beyond len * 2 as long
+ * as we are converting from single-byte characters, though) */
+ newbuf = emalloc(len * 4);
+ while (pos > 0) {
+ c = encoder ? encoder((unsigned char)(*s)) : (unsigned short)(*s);
+ if (c < 0x80) {
+ newbuf[(*newlen)++] = c;
+ } else if (c < 0x800) {
+ newbuf[(*newlen)++] = (0xc0 | (c >> 6));
+ newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
+ } else if (c < 0x10000) {
+ newbuf[(*newlen)++] = (0xe0 | (c >> 12));
+ newbuf[(*newlen)++] = (0xc0 | ((c >> 6) & 0x3f));
+ newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
+ } else if (c < 0x200000) {
+ newbuf[(*newlen)++] = (0xf0 | (c >> 18));
+ newbuf[(*newlen)++] = (0xe0 | ((c >> 12) & 0x3f));
+ newbuf[(*newlen)++] = (0xc0 | ((c >> 6) & 0x3f));
+ newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
+ }
+ pos--;
+ s++;
+ }
+ if (*newlen < len * 4) {
+ newbuf = erealloc(newbuf, *newlen);
+ }
+ return newbuf;
  }
! /* }}} */
! /* {{{ xml_utf8_decode */
  static char *
! xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
  {
          int pos = len;
          char *newbuf = emalloc(len);
***************
*** 449,459 ****
                  return newbuf;
          }
          while (pos > 0) {
- #ifdef XML_UNICODE /* defined if we are using 16-bit Unicode characters */
- c = (*s);
- len--;
- s++;
- #else
                  c = (unsigned char)(*s);
                  if (c >= 0xf0) { /* four bytes encoded, 21 bits */
                          c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
--- 530,535 ----
***************
*** 471,477 ****
                          s++;
                          pos--;
                  }
- #endif
                  newbuf[*newlen] = decoder ? decoder(c) : c;
                  ++*newlen;
      }
--- 547,552 ----
***************
*** 521,527 ****
                  len = php3i_xmlcharlen(s);
          }
          ret->type = IS_STRING;
! ret->value.str.val = xml_treat_utf8(s, len, &ret->value.str.len, encoding);
          return ret;
  }
  
--- 596,602 ----
                  len = php3i_xmlcharlen(s);
          }
          ret->type = IS_STRING;
! ret->value.str.val = xml_utf8_decode(s, len, &ret->value.str.len, encoding);
          return ret;
  }
  
***************
*** 1216,1221 ****
--- 1291,1338 ----
                          break;
          }
          RETVAL_FALSE;
+ }
+
+ /* }}} */
+ /* {{{ string utf8_encode(string data) */
+
+ PHP_FUNCTION(utf8_encode)
+ {
+ pval *arg;
+ XML_Char *encoded;
+ int len;
+ XML_TLS_VARS;
+
+ if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ convert_to_string(arg);
+ encoded = xml_utf8_encode(arg->value.str.val, arg->value.str.len, &len, "ISO-8859-1");
+ if (encoded == NULL) {
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(encoded, len, 0);
+ }
+
+ /* }}} */
+ /* {{{ string utf8_decode(string data) */
+
+ PHP_FUNCTION(utf8_decode)
+ {
+ pval *arg;
+ XML_Char *decoded;
+ int len;
+ XML_TLS_VARS;
+
+ if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ convert_to_string(arg);
+ decoded = xml_utf8_decode(arg->value.str.val, arg->value.str.len, &len, "ISO-8859-1");
+ if (decoded == NULL) {
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(decoded, len, 0);
  }
  
  /* }}} */
Index: php3/functions/php3_xml.h
diff -c php3/functions/php3_xml.h:1.6 php3/functions/php3_xml.h:1.7
*** php3/functions/php3_xml.h:1.6 Tue Oct 27 22:35:50 1998
--- php3/functions/php3_xml.h Wed Oct 28 00:27:47 1998
***************
*** 37,42 ****
--- 37,46 ----
  #include <xml/xmltok.h>
  #include <xml/xmlparse.h>
  
+ #ifdef XML_UNICODE
+ # error "UTF-16 Unicode support not implemented!"
+ #endif
+
  typedef struct {
          int le_xml_parser;
          XML_Char *default_encoding;
***************
*** 96,101 ****
--- 100,107 ----
  PHP_FUNCTION(xml_parser_free);
  PHP_FUNCTION(xml_parser_set_option);
  PHP_FUNCTION(xml_parser_get_option);
+ PHP_FUNCTION(utf8_encode);
+ PHP_FUNCTION(utf8_decode);
  
  #else /* !HAVE_LIBEXPAT */
  

--
PHP Development Mailing List   http://www.php.net/
To unsubscribe send an empty message to php-dev-unsubscribe <email protected>
For help: php-dev-help <email protected>