php.net |  support |  documentation |  report a bug |  advanced search |  search howto |  statistics |  random bug |  login
Request #2685 HtmlEntities should handle UTF-8
Submitted: 1999-11-08 12:07 UTC Modified: 2002-10-01 21:48 UTC
From: drew at elysium dot ltd dot uk Assigned:
Status: Closed Package: Feature/Change Request
PHP Version: 4.2.3 OS: Linux RH 6.0
Private report: No CVE-ID: None
Welcome back! If you're the original bug submitter, here's where you can edit the bug or add additional notes.
If you forgot your password, you can retrieve your password here.
Password:
Status:
Package:
Bug Type:
Summary:
From: drew at elysium dot ltd dot uk
New email:
PHP Version: OS:

 

 [1999-11-08 12:07 UTC] drew at elysium dot ltd dot uk
I needed a version of HtmlEntities that handled UTF-8 better then the default php one, I needed to cater for σ in an XML document and output it as at least Σ.  I have created another function call Utf8_HtmlEntities which does exactly this and have included the diff.

diff -c php-3.0.12.orig/functions/basic_functions.c php-3.0.12/functions/basic_functions.c
*** php-3.0.12.orig/functions/basic_functions.c	Mon Nov  8 16:51:15 1999
--- php-3.0.12/functions/basic_functions.c	Mon Nov  8 16:59:43 1999
***************
*** 167,172 ****
--- 167,173 ----
  	{"getimagesize",		php3_getimagesize,	NULL},
  	{"htmlspecialchars",	php3_htmlspecialchars,	NULL},
  	{"htmlentities",		php3_htmlentities,	NULL},
+ 	{"utf8_htmlentities",		php3_utf8_htmlentities,	NULL},
  	{"md5",					php3_md5,			NULL},
  
  	{"iptcparse",	php3_iptcparse,				NULL},
diff -c php-3.0.12.orig/functions/html.c php-3.0.12/functions/html.c
*** php-3.0.12.orig/functions/html.c	Mon Nov  8 16:51:15 1999
--- php-3.0.12/functions/html.c	Mon Nov  8 16:58:53 1999
***************
*** 60,123 ****
  	"uuml","yacute","thorn","yuml"
  };
  
! PHPAPI char * _php3_htmlentities(char *s, int i, int all)
  {
! 	int len, maxlen;
!     unsigned char *old;
  	char *new;
  
! 	old = (unsigned char *)s;
  
! 	maxlen = 2 * i;
  	if (maxlen < 128)
  		maxlen = 128;
  	new = emalloc (maxlen);
! 	len = 0;
  
! 	while (i--) {
! 		if (len + 9 > maxlen)
! 			new = erealloc (new, maxlen += 128);
! 		if (38 == *old) {
! 			memcpy (new + len, "&amp;", 5);
! 			len += 5;
! 		} else if (34 == *old) {
! 			memcpy (new + len, "&quot;", 6);
! 			len += 6;
! 		} else if (60 == *old) {
! 			memcpy (new + len, "&lt;", 4);
! 			len += 4;
! 		} else if (62 == *old) {
! 			memcpy (new + len, "&gt;", 4);
! 			len += 4;
! 		} else if (all && 160 <= *old) {
! 			new [len++] = '&';
! 			strcpy (new + len, EntTable [*old - 160]);
! 			len += strlen (EntTable [*old - 160]);
! 			new [len++] = ';';
  		} else {
! 			new [len++] = *old;
  		}
- 		old++;
  	}
!     new [len] = '\0';
  	return new;
  }
  
  static void _htmlentities(INTERNAL_FUNCTION_PARAMETERS, int all)
  {
!     pval *arg;
  	char *new;
  	TLS_VARS;
  
!     if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
  		WRONG_PARAM_COUNT;
!     }
  
!     convert_to_string(arg);
   
  	new = _php3_htmlentities(arg->value.str.val, arg->value.str.len, all);
  
! 	RETVAL_STRINGL(new,strlen(new),0);
  }
  
  /* {{{ proto string htmlspecialchars(string string)
--- 60,210 ----
  	"uuml","yacute","thorn","yuml"
  };
  
! PHPAPI char * _php3_htmlentities(char *s, int len, int all)
  {
! 	int newlen, maxlen;
! 	int pos = len;
! 	unsigned char c;
  	char *new;
  
! 	maxlen = 2 * len;
! 	if (maxlen < 128)
! 		maxlen = 128;
! 	new = emalloc(maxlen);
! 	newlen = 0;
! 
! 	while (pos > 0) {
! 		c = (unsigned char)(*s);
! 		if (newlen + 9 > maxlen)
! 			new = erealloc(new, maxlen += 128);
! 		if (c == 38) {
! 			memcpy(new + newlen, "&amp;", 5);
! 			newlen += 5;
! 		} else if (c == 34) {
! 			memcpy(new + newlen, "&quot;", 6);
! 			newlen += 6;
! 		} else if (c == 60) {
! 			memcpy(new + newlen, "&lt;", 4);
! 			newlen += 4;
! 		} else if (c == 62) {
! 			memcpy(new + newlen, "&gt;", 4);
! 			newlen += 4;
! 		} else if (all && c >= 160) {
! 			new[newlen++] = '&';
! 			strcpy(new + newlen, EntTable[c - 160]);
! 			newlen += strlen(new + newlen);
! 			new[newlen++] = ';';
! 		} else {
! 			new[newlen++] = c;
! 		}
! 		s++;
! 	}
! 	new[newlen] = '\0';
! 	return new;
! }
  
! PHPAPI char * _php3_utf8_htmlentities(char *s, int len, int all)
! {
! 	int newlen, maxlen;
!         int pos = len;
!         unsigned long c;
! 	char *new;
! 
! 	maxlen = 2 * len;
  	if (maxlen < 128)
  		maxlen = 128;
  	new = emalloc (maxlen);
! 	newlen = 0;
! 
! 	while (pos > 0) {
! 		c = (unsigned char)(*s);
! 		/* four bytes encoded, 21 bits */
! 		if (c >= 0xf0 && pos >= 4) {
! 			c = (*(s++) & 7) << 18;
! 			c += (*(s++) & 63) << 12;
! 			c += (*(s++) & 63) << 6;
! 			c += (*(s++) & 63);
! 			pos -= 4;
! 		/* three bytes encoded, 16 bits */
! 		} else if (c >= 0xe0 && c < 0xf0 && pos >= 3) {
! 			c = (*(s++) & 15) << 12;
! 			c += (*(s++) & 63) << 6;
! 			c += (*(s++) & 63);
! 			pos -= 3;
! 		/* two bytes encoded, 11 bits */
! 		} else if (c >= 0xc0 && c < 0xe0 && pos >= 2) {
! 			c = ((unsigned long)*(s++) & 31) << 6;
! 			c += ((unsigned long)*(s++) & 63);
! 			pos -= 2;
! 		} else {
! 			c = (*(s++) & 127);
! 			pos--;
! 		}
  
! 		if (newlen + 11 > maxlen)
! 			new = erealloc(new, maxlen += 128);
! 		if (c == 38) {
! 			memcpy(new + newlen, "&amp;", 5);
! 			newlen += 5;
! 		} else if (c == 34) {
! 			memcpy(new + newlen, "&quot;", 6);
! 			newlen += 6;
! 		} else if (c == 60) {
! 			memcpy (new + newlen, "&lt;", 4);
! 			newlen += 4;
! 		} else if (c == 62) {
! 			memcpy (new + newlen, "&gt;", 4);
! 			newlen += 4;
! 		} else if (all && c >= 160) {
! 			new[newlen++] = '&';
! 			if (c <= 255) {
! 				strcpy(new + newlen, EntTable[c - 160]);
! 			} else {
! 				new[newlen++] = '#';
! 				sprintf(new + newlen, "%lu", c);
! 			}
! 			newlen += strlen(new + newlen);
! 			new[newlen++] = ';';
  		} else {
! 			new[newlen++] = (unsigned char)c;
  		}
  	}
! 	new[newlen] = '\0';
  	return new;
  }
  
  static void _htmlentities(INTERNAL_FUNCTION_PARAMETERS, int all)
  {
! 	pval *arg;
  	char *new;
  	TLS_VARS;
  
! 	if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
  		WRONG_PARAM_COUNT;
! 	}
  
! 	convert_to_string(arg);
   
  	new = _php3_htmlentities(arg->value.str.val, arg->value.str.len, all);
  
! 	RETVAL_STRINGL(new,strlen(new), 0);
! }
! 
! static void _utf8_htmlentities(INTERNAL_FUNCTION_PARAMETERS, int all)
! {
! 	pval *arg;
! 	char *new;
! 	TLS_VARS;
! 
! 	if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
! 		WRONG_PARAM_COUNT;
! 	}
! 
! 	convert_to_string(arg);
!  
! 	new = _php3_utf8_htmlentities(arg->value.str.val, arg->value.str.len, all);
! 
! 	RETVAL_STRINGL(new, strlen(new), 0);
  }
  
  /* {{{ proto string htmlspecialchars(string string)
***************
*** 133,140 ****
     Convert all applicable characters to HTML entities */
  void php3_htmlentities(INTERNAL_FUNCTION_PARAMETERS)
  {
! /*      _php3_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU,1);*/
! 	_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU,1);
  }
  /* }}} */
  
--- 220,236 ----
     Convert all applicable characters to HTML entities */
  void php3_htmlentities(INTERNAL_FUNCTION_PARAMETERS)
  {
! /*      _php3_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);*/
! 	_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
! }
! /* }}} */
! 
! /* {{{ proto string utf8_htmlentities(string string)
!    Convert all applicable UTF-8 characters to HTML entities */
! void php3_utf8_htmlentities(INTERNAL_FUNCTION_PARAMETERS)
! {
! /*      _php3_utf8_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);*/
! 	_utf8_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  }
  /* }}} */
  
***************
*** 144,146 ****
--- 240,243 ----
   * c-basic-offset: 4
   * End:
   */
+ 
diff -c php-3.0.12.orig/functions/html.h php-3.0.12/functions/html.h
*** php-3.0.12.orig/functions/html.h	Mon Nov  8 16:51:15 1999
--- php-3.0.12/functions/html.h	Mon Nov  8 16:58:56 1999
***************
*** 34,39 ****
  
  extern void php3_htmlspecialchars(INTERNAL_FUNCTION_PARAMETERS);
  extern void php3_htmlentities(INTERNAL_FUNCTION_PARAMETERS);
! PHPAPI char * _php3_htmlentities(char *s, int i, int all);
  
  #endif /* _HTML_H */
--- 34,41 ----
  
  extern void php3_htmlspecialchars(INTERNAL_FUNCTION_PARAMETERS);
  extern void php3_htmlentities(INTERNAL_FUNCTION_PARAMETERS);
! extern void php3_utf8_htmlentities(INTERNAL_FUNCTION_PARAMETERS);
! PHPAPI char * _php3_htmlentities(char *s, int len, int all);
! PHPAPI char * _php3_utf8_htmlentities(char *s, int len, int all);
  
  #endif /* _HTML_H */

Patches

Pull Requests

History

AllCommentsChangesGit/SVN commitsRelated reports
 [2002-10-01 10:24 UTC] hholzgra@php.net
changed to feature request and bumped up the version as the request still looks valid ...
 [2002-10-01 21:48 UTC] wez@php.net
IIRC, this is in the 4.2 branch already.
htmlentities($str, ENT_COMPAT, "utf-8");
If not, it's in 4.3 due soon.
 
PHP Copyright © 2001-2024 The PHP Group
All rights reserved.
Last updated: Thu Dec 26 22:01:28 2024 UTC