Bug #15284 Iconv fails to convert to/from USC-4
Submitted: 2002-01-29 16:58 UTC Modified: 2002-03-02 02:11 UTC
From: nahshon at actcom dot co dot il Assigned: yohgaki (profile)
Status: Closed Package: ICONV related
PHP Version: 4.1.1 OS: Linux
Private report: No CVE-ID: None
From: nahshon at actcom dot co dot il
 [2002-01-29 16:58 UTC] nahshon at actcom dot co dot il
Iconv fails to convert to/from UCS-4 (and some other
encodings that may include null bytes).

This is because it is using strlen to determine the
length of a string (both on the input and the output).
Instead it should use the internal length (Z_STRLEN_PP)
for the param to iconv.
The length (calculated from value returned by iconv)
should be used for the result(use RETVAL_STRINGL).

Attched diff is for 4.0.6 (4.1.1 is similar except
for small style changes).
Other changes in the patch are:
a. php_iconv_string made static.
b. Fix compile warning with glibc-2.2.4 - Param to
   iconv is "char **" and not "const char **" (thogh
   the second makes more sense).

-int php_iconv_string(char *, char **, char *, char *);
+static int php_iconv_string(char *, unsigned int, char 
**, unsigned int *, char *, char *);

input_encoding,        zend_iconv_globals,  iconv_globals)
@@ -111,20 +111,23 @@

-int php_iconv_string(char *in_p, char **out, char 
*in_charset, char *out_charset)
+static int php_iconv_string(char *in_p, unsigned int 
+                        char **out, unsigned int *out_len,
+                    char *in_charset, char *out_charset)
-    unsigned int in_size, out_size;
+    unsigned int in_size, out_size, out_left;
     char *out_buffer, *out_p;
     iconv_t cd;
     size_t result;
     typedef unsigned int ucs4_t;

-    in_size  = strlen(in_p) * sizeof(char) + 1;
-    out_size = strlen(in_p) * sizeof(ucs4_t) + 1;
+    in_size  = in_len;
+    out_size = in_len * sizeof(ucs4_t) + 1;

     out_buffer = (char *) emalloc(out_size);
        *out = out_buffer;
     out_p = out_buffer;
+       out_left = out_size;

     cd = icv_open(out_charset, in_charset);

@@ -135,14 +138,16 @@
                return FAILURE;

-       result = icv(cd, (const char **) &in_p, &in_size, 
(char **)
-                                  &out_p, &out_size);
+       result = icv(cd, (char **) &in_p, &in_size, (char 
+                                  &out_p, &out_left);

     if (result == (size_t)(-1)) {
-                efree(out_buffer);
+               efree(out_buffer);
                return FAILURE;

+       *out_len = out_size - out_left;

     return SUCCESS;
@@ -155,6 +160,7 @@
     zval **in_charset, **out_charset, **in_buffer;
        char *out_buffer;
+       unsigned int out_len;

     if (ZEND_NUM_ARGS() != 3 || zend_get_parameters_ex(3, 
&in_charset, &out_charset, &in_buffer) == FAILURE) {
@@ -164,8 +170,10 @@

-       if (php_iconv_string(Z_STRVAL_PP(in_buffer), 
&out_buffer, Z_STRVAL_PP(in_charset), 
Z_STRVAL_PP(out_charset)) == SUCCESS) {
-               RETVAL_STRING(out_buffer, 0);
+       if (php_iconv_string(Z_STRVAL_PP(in_buffer), 
+                                        &out_buffer, 
Z_STRVAL_PP(in_charset), Z_STRVAL_PP(out_charset)) == 
+               RETVAL_STRINGL(out_buffer, out_len, 0);
        } else {
@@ -178,6 +186,7 @@
        char *out_buffer;
        zval **zv_string, **zv_status;
+       unsigned int out_len;

@@ -189,10 +198,11 @@

        if (SG(sapi_headers).send_default_content_type &&
-               php_iconv_string(Z_STRVAL_PP(zv_string), 
+               php_iconv_string(Z_STRVAL_PP(zv_string), 
&out_buffer, &out_len,
ICONVG(output_encoding))==SUCCESS) {
-               RETVAL_STRING(out_buffer, 0);
+               RETVAL_STRINGL(out_buffer, out_len, 0);
        } else {
                *return_value = **zv_string;


 [2002-01-29 20:32 UTC]
Patch seems ok  to me except you've made php_iconv_string to static.

Have you checked no other module uses php_iconv_string? I'm fine to make it static function. We can make it non-static when we need :)

Could you send me patch against 4.2.0-dev for testing?

 [2002-01-30 15:26 UTC] nahshon at actcom dot co dot il
I grepped php_iconv_string and it's referenced just in this
file. I felt that changing it to static was the best thing
to do because the args to the function have changed, so if
I missed a caller that would result in linkage rather than 
a runtime error.

I applied my changes to the latest CVS version but I
have done only minimal testing (in command-line php).

The patch is exatly the same as I already submitted here, 
modulo style changes.

The new patch applies correctly also to php-4.1.1.
I will send the new patch directly to
 [2002-03-02 02:11 UTC]
Thanks for reporting.

I've improved iconv support for lastest CVS.
Since you are using linux, you should not have problems anymore.

report new problems if you find one :)

