php.net |  support |  documentation |  report a bug |  advanced search |  search howto |  statistics |  random bug |  login
Bug #59468 new parser code
Submitted: 2010-10-17 18:54 UTC Modified: 2010-11-21 20:19 UTC
From: martynas at venck dot us Assigned: martynas (profile)
Status: Closed Package: htscanner (PECL)
PHP Version: Irrelevant OS: Linux
Private report: No CVE-ID: None
Welcome back! If you're the original bug submitter, here's where you can edit the bug or add additional notes.
If you forgot your password, you can retrieve your password here.
Password:
Status:
Package:
Bug Type:
Summary:
From: martynas at venck dot us
New email:
PHP Version: OS:

 

 [2010-10-17 18:54 UTC] martynas at venck dot us
Description:
------------
I've completely rewritten htscanner parser code--it's much 
cleaner now;  weighs less lines of code;  and supports new 
features.

- <IfModule mod_php5.c></IfModule> support.  Htscanner will 
only scan php_{flag,value} directives in global namespace, 
or 
mod_php5.c namespace.  This fixes quite some applications.

- Comments support--every line starting with '#' will be 
ignored.

- Much simpler and robust code;  uses standard ANSI C 
strtok_r() instead of hand-rolled string parsing functions.

Previous parser has been a source of quite some problems.  I 
hope this would at least fix #16130, #16891, #17067, #18886.

Index: htscanner-trunk/htscanner.c
============================================================
=======
--- htscanner-trunk/htscanner.c	(revision 304454)
+++ htscanner-trunk/htscanner.c	(working copy)
@@ -35,7 +35,8 @@
 
 int (*php_cgi_sapi_activate)(TSRMLS_D);
 
-#define FILE_BUFFER 1000
+#define FILE_BUFFER 1024
+#define FILE_SEPARATOR "'\" \t\r\n"
 #define HTSCANNER_DEBUG 0
 #define HTSCANNER_ENABLE_CACHE 1 
 
@@ -61,12 +62,6 @@
 /* }}} */
 #endif
 
-#define PHP_HTSCANNER_LTRIM(p) { \
-	while ((*p == ' ' || *p == '\t' || *p == '\r' || *p 
== '\v') && (*p != '\0')) { \
-		p++; \
-	} \
-}
-
 #define RETURN_FAILURE(msg) { \
 	if (HTG(stop_on_error) > 0) { \
 		if (msg) { \
@@ -179,57 +174,6 @@
 }
 /* }}} */
 
-/* {{{ value_hnd_strip
- * Parse an option and try to set the option
- */
-static int value_hnd_strip(char *string, int flag, int 
mode, HashTable *ini_entries TSRMLS_DC)
-{
-	char *name;
-	char *value;
-	int value_len;
-
-	name = string;
-	/* strip any leading whitespaces or tabs from the 
name */
-	PHP_HTSCANNER_LTRIM(name);
-	value = strchr(name, ' ');
-	if (!value) {
-		value = strchr(name, '\t');
-	}
-	if (value) {
-		*value = 0;
-		++value;
-		PHP_HTSCANNER_LTRIM(value);
-
-		/*
-		 * strip EOL characters (CRLF/LF/CR) if 
needed.
-		 * Boris HUISGEN <bhuisgen@hbis.fr>
-		 */
-		value_len = strlen(value);
-		if (value_len > 2 && value[value_len - 2] == 
'\r') {
-			value[value_len - 2] = 0;
-		} else if (value[value_len - 1] == '\n') {
-			value[value_len - 1] = 0;
-		} else if (value[value_len - 1] == '\r') {
-			value[value_len - 1] = 0;
-		} else {
-			value[value_len] = 0;
-		}
-
-		/* strip quoting characters */
-		value_len = strlen(value);
-		if ((value[0] == '\'' && value[value_len - 
1] == '\'') ||
-				(value[0] == '\"' && 
value[value_len - 1] == '\"')) {
-			value[value_len - 1] = 0;
-			value++;
-		}
-
-		return value_hnd(name, value, flag, mode, 
ini_entries TSRMLS_CC);
-	}
-
-	return FAILURE;
-}
-/* }}} */
-
 /* {{{ parse_config_file
  * Parse the configuration file
  */
@@ -253,18 +197,59 @@
 #endif
 
 	stream = php_stream_open_wrapper(file, "rb", 
ENFORCE_SAFE_MODE, NULL);
-
 	if (stream != NULL) {
-		char buf[FILE_BUFFER];
-		char *pos;
-		while ((pos = php_stream_gets(stream, buf, 
FILE_BUFFER)) != NULL) {
-			/* strip leading spaces or tabs */
-			PHP_HTSCANNER_LTRIM(pos);
+		char buf[FILE_BUFFER], *bufp, *name = NULL;
+		unsigned ifmodule = 0, flag = 0, value = 0, 
parse = 1;
+		while ((bufp = php_stream_gets(stream, buf, 
FILE_BUFFER)) != NULL) {
+			char *tok, *last;
+			/* Skip comments. */
+			if (*bufp == '#')
+				continue;
+			for (tok = strtok_r(bufp, 
FILE_SEPARATOR, &last); tok;
+			    tok = strtok_r(NULL, 
FILE_SEPARATOR, &last)) {
+				/*
+				 * Handle <IfModule 
mod_php5.c></IfModule>.
+				 */
+				if (!strcasecmp(tok, 
"<IfModule")) {
+					ifmodule = 1;
+					continue;
+				}
+				if (ifmodule) {
+					ifmodule = 0;
+					if (!strcasecmp(tok, 
"mod_php5.c>"))
+						parse = 1;
+					else
+						parse = 0;
+					continue;
+				}
+				if (!strcasecmp(tok, 
"</IfModule>")) {
+					parse = 1;
+					continue;
+				}
+				if (!parse)
+					continue;
 
-			if (strncmp(pos, "php_value", 
sizeof("php_value") - 1) == 0) {
-				value_hnd_strip(pos + 
sizeof("php_value"), 0, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
-			} else if (strncmp(pos, "php_flag", 
sizeof("php_flag") - 1) == 0) {
-				value_hnd_strip(pos + 
sizeof("php_flag"), 1, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
+				/*
+				 * Handle php_flag and 
php_value.
+				 */
+				if (flag || value) {
+					if (name == NULL)
+						name = tok;
+					else {
+						
value_hnd(name, tok, flag, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
+						flag = value 
= 0;
+						name = NULL;
+					}
+					continue;
+				}
+				if (!strcmp(tok, 
"php_flag")) {
+					flag = 1;
+					continue;
+				}
+				if (!strcmp(tok, 
"php_value")) {
+					value = 1;
+					continue;
+				}
 			}
 		}
 		php_stream_close(stream);


Patches

Pull Requests

History

AllCommentsChangesGit/SVN commitsRelated reports
 [2010-10-19 13:12 UTC] martynas at venck dot us
Actually, Apache scans it line-oriented;  which makes it even easier.

Currently the parser is only 30 lines long.

We've been running with this in production;  this fixed leading space issue for our client.  (#18886)

Index: htscanner.c
===================================================================
--- htscanner.c	(revision 304454)
+++ htscanner.c	(working copy)
@@ -35,7 +35,8 @@
 
 int (*php_cgi_sapi_activate)(TSRMLS_D);
 
-#define FILE_BUFFER 1000
+#define FILE_BUFFER 1024
+#define FILE_SEPARATOR " \t\r\n"
 #define HTSCANNER_DEBUG 0
 #define HTSCANNER_ENABLE_CACHE 1 
 
@@ -61,12 +62,6 @@
 /* }}} */
 #endif
 
-#define PHP_HTSCANNER_LTRIM(p) { \
-	while ((*p == ' ' || *p == '\t' || *p == '\r' || *p == '\v') && (*p != '\0')) { \
-		p++; \
-	} \
-}
-
 #define RETURN_FAILURE(msg) { \
 	if (HTG(stop_on_error) > 0) { \
 		if (msg) { \
@@ -179,57 +174,6 @@
 }
 /* }}} */
 
-/* {{{ value_hnd_strip
- * Parse an option and try to set the option
- */
-static int value_hnd_strip(char *string, int flag, int mode, HashTable *ini_entries TSRMLS_DC)
-{
-	char *name;
-	char *value;
-	int value_len;
-
-	name = string;
-	/* strip any leading whitespaces or tabs from the name */
-	PHP_HTSCANNER_LTRIM(name);
-	value = strchr(name, ' ');
-	if (!value) {
-		value = strchr(name, '\t');
-	}
-	if (value) {
-		*value = 0;
-		++value;
-		PHP_HTSCANNER_LTRIM(value);
-
-		/*
-		 * strip EOL characters (CRLF/LF/CR) if needed.
-		 * Boris HUISGEN <bhuisgen@hbis.fr>
-		 */
-		value_len = strlen(value);
-		if (value_len > 2 && value[value_len - 2] == '\r') {
-			value[value_len - 2] = 0;
-		} else if (value[value_len - 1] == '\n') {
-			value[value_len - 1] = 0;
-		} else if (value[value_len - 1] == '\r') {
-			value[value_len - 1] = 0;
-		} else {
-			value[value_len] = 0;
-		}
-
-		/* strip quoting characters */
-		value_len = strlen(value);
-		if ((value[0] == '\'' && value[value_len - 1] == '\'') ||
-				(value[0] == '\"' && value[value_len - 1] == '\"')) {
-			value[value_len - 1] = 0;
-			value++;
-		}
-
-		return value_hnd(name, value, flag, mode, ini_entries TSRMLS_CC);
-	}
-
-	return FAILURE;
-}
-/* }}} */
-
 /* {{{ parse_config_file
  * Parse the configuration file
  */
@@ -253,18 +197,37 @@
 #endif
 
 	stream = php_stream_open_wrapper(file, "rb", ENFORCE_SAFE_MODE, NULL);
-
 	if (stream != NULL) {
-		char buf[FILE_BUFFER];
-		char *pos;
-		while ((pos = php_stream_gets(stream, buf, FILE_BUFFER)) != NULL) {
-			/* strip leading spaces or tabs */
-			PHP_HTSCANNER_LTRIM(pos);
-
-			if (strncmp(pos, "php_value", sizeof("php_value") - 1) == 0) {
-				value_hnd_strip(pos + sizeof("php_value"), 0, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
-			} else if (strncmp(pos, "php_flag", sizeof("php_flag") - 1) == 0) {
-				value_hnd_strip(pos + sizeof("php_flag"), 1, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
+		char buf[FILE_BUFFER], *bufp;
+		int parse = 1;
+		while ((bufp = php_stream_gets(stream, buf, FILE_BUFFER)) != NULL) {
+			char *tok, *last;
+			int flag = 0;
+			if ((tok = strtok_r(bufp, FILE_SEPARATOR, &last)) == NULL)
+				continue;
+			if (!strcasecmp(tok, "<IfModule")) {
+				tok = strtok_r(NULL, FILE_SEPARATOR, &last);
+				parse = tok && strcasecmp(tok, "mod_php5.c>") == 0;
+			} else if (!strcasecmp(tok, "</IfModule>")) {
+				parse = 1;
+			} else if (parse && ((flag = !strcasecmp(tok, "php_flag")) || !strcasecmp(tok, 
"php_value"))) {
+				char *name, *sep;
+				if ((name = strtok_r(NULL, FILE_SEPARATOR, &last)) == NULL || last == NULL)
+					continue;
+				last += strspn(last, FILE_SEPARATOR);
+				switch (*last) {
+				case '\'':
+					sep = "'";
+					break;
+				case '"':
+					sep = "\"";
+					break;
+				default:
+					sep = FILE_SEPARATOR;
+				}
+				if ((tok = strtok_r(NULL, sep, &last)) == NULL)
+					continue;
+				value_hnd(name, tok, flag, PHP_INI_PERDIR, ini_entries TSRMLS_CC);
 			}
 		}
 		php_stream_close(stream);
 [2010-11-21 20:19 UTC] martynas at venck dot us
The htscanner parser code has been rewritten in trunk.
 
PHP Copyright © 2001-2025 The PHP Group
All rights reserved.
Last updated: Sun May 11 15:01:27 2025 UTC