php.net |  support |  documentation |  report a bug |  advanced search |  search howto |  statistics |  random bug |  login
Bug #59468 new parser code
Submitted: 2010-10-17 18:54 UTC Modified: 2010-11-21 20:19 UTC
From: martynas at venck dot us Assigned: martynas (profile)
Status: Closed Package: htscanner (PECL)
PHP Version: Irrelevant OS: Linux
Private report: No CVE-ID: None
View Add Comment Developer Edit
Welcome! If you don't have a Git account, you can't do anything here.
You can add a comment by following this link or if you reported this bug, you can edit this bug over here.
Block user comment
Status: Assign to:
Package:
Bug Type:
Summary:
From: martynas at venck dot us
New email:
PHP Version: OS:

 

 [2010-10-17 18:54 UTC] martynas at venck dot us
Description:
------------
I've completely rewritten htscanner parser code--it's much 
cleaner now;  weighs less lines of code;  and supports new 
features.

- <IfModule mod_php5.c></IfModule> support.  Htscanner will 
only scan php_{flag,value} directives in global namespace, 
or 
mod_php5.c namespace.  This fixes quite some applications.

- Comments support--every line starting with '#' will be 
ignored.

- Much simpler and robust code;  uses standard ANSI C 
strtok_r() instead of hand-rolled string parsing functions.

Previous parser has been a source of quite some problems.  I 
hope this would at least fix #16130, #16891, #17067, #18886.

Index: htscanner-trunk/htscanner.c
============================================================
=======
--- htscanner-trunk/htscanner.c	(revision 304454)
+++ htscanner-trunk/htscanner.c	(working copy)
@@ -35,7 +35,8 @@
 
 int (*php_cgi_sapi_activate)(TSRMLS_D);
 
-#define FILE_BUFFER 1000
+#define FILE_BUFFER 1024
+#define FILE_SEPARATOR "'\" \t\r\n"
 #define HTSCANNER_DEBUG 0
 #define HTSCANNER_ENABLE_CACHE 1 
 
@@ -61,12 +62,6 @@
 /* }}} */
 #endif
 
-#define PHP_HTSCANNER_LTRIM(p) { \
-	while ((*p == ' ' || *p == '\t' || *p == '\r' || *p 
== '\v') && (*p != '\0')) { \
-		p++; \
-	} \
-}
-
 #define RETURN_FAILURE(msg) { \
 	if (HTG(stop_on_error) > 0) { \
 		if (msg) { \
@@ -179,57 +174,6 @@
 }
 /* }}} */
 
-/* {{{ value_hnd_strip
- * Parse an option and try to set the option
- */
-static int value_hnd_strip(char *string, int flag, int 
mode, HashTable *ini_entries TSRMLS_DC)
-{
-	char *name;
-	char *value;
-	int value_len;
-
-	name = string;
-	/* strip any leading whitespaces or tabs from the 
name */
-	PHP_HTSCANNER_LTRIM(name);
-	value = strchr(name, ' ');
-	if (!value) {
-		value = strchr(name, '\t');
-	}
-	if (value) {
-		*value = 0;
-		++value;
-		PHP_HTSCANNER_LTRIM(value);
-
-		/*
-		 * strip EOL characters (CRLF/LF/CR) if 
needed.
-		 * Boris HUISGEN <bhuisgen@hbis.fr>
-		 */
-		value_len = strlen(value);
-		if (value_len > 2 && value[value_len - 2] == 
'\r') {
-			value[value_len - 2] = 0;
-		} else if (value[value_len - 1] == '\n') {
-			value[value_len - 1] = 0;
-		} else if (value[value_len - 1] == '\r') {
-			value[value_len - 1] = 0;
-		} else {
-			value[value_len] = 0;
-		}
-
-		/* strip quoting characters */
-		value_len = strlen(value);
-		if ((value[0] == '\'' && value[value_len - 
1] == '\'') ||
-				(value[0] == '\"' && 
value[value_len - 1] == '\"')) {
-			value[value_len - 1] = 0;
-			value++;
-		}
-
-		return value_hnd(name, value, flag, mode, 
ini_entries TSRMLS_CC);
-	}
-
-	return FAILURE;
-}
-/* }}} */
-
 /* {{{ parse_config_file
  * Parse the configuration file
  */
@@ -253,18 +197,59 @@
 #endif
 
 	stream = php_stream_open_wrapper(file, "rb", 
ENFORCE_SAFE_MODE, NULL);
-
 	if (stream != NULL) {
-		char buf[FILE_BUFFER];
-		char *pos;
-		while ((pos = php_stream_gets(stream, buf, 
FILE_BUFFER)) != NULL) {
-			/* strip leading spaces or tabs */
-			PHP_HTSCANNER_LTRIM(pos);
+		char buf[FILE_BUFFER], *bufp, *name = NULL;
+		unsigned ifmodule = 0, flag = 0, value = 0, 
parse = 1;
+		while ((bufp = php_stream_gets(stream, buf, 
FILE_BUFFER)) != NULL) {
+			char *tok, *last;
+			/* Skip comments. */
+			if (*bufp == '#')
+				continue;
+			for (tok = strtok_r(bufp, 
FILE_SEPARATOR, &last); tok;
+			    tok = strtok_r(NULL, 
FILE_SEPARATOR, &last)) {
+				/*
+				 * Handle <IfModule 
mod_php5.c></IfModule>.
+				 */
+				if (!strcasecmp(tok, 
"<IfModule")) {
+					ifmodule = 1;
+					continue;
+				}
+				if (ifmodule) {
+					ifmodule = 0;
+					if (!strcasecmp(tok, 
"mod_php5.c>"))
+						parse = 1;
+					else
+						parse = 0;
+					continue;
+				}
+				if (!strcasecmp(tok, 
"</IfModule>")) {
+					parse = 1;
+					continue;
+				}
+				if (!parse)
+					continue;
 
-			if (strncmp(pos, "php_value", 
sizeof("php_value") - 1) == 0) {
-				value_hnd_strip(pos + 
sizeof("php_value"), 0, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
-			} else if (strncmp(pos, "php_flag", 
sizeof("php_flag") - 1) == 0) {
-				value_hnd_strip(pos + 
sizeof("php_flag"), 1, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
+				/*
+				 * Handle php_flag and 
php_value.
+				 */
+				if (flag || value) {
+					if (name == NULL)
+						name = tok;
+					else {
+						
value_hnd(name, tok, flag, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
+						flag = value 
= 0;
+						name = NULL;
+					}
+					continue;
+				}
+				if (!strcmp(tok, 
"php_flag")) {
+					flag = 1;
+					continue;
+				}
+				if (!strcmp(tok, 
"php_value")) {
+					value = 1;
+					continue;
+				}
 			}
 		}
 		php_stream_close(stream);


Patches

Add a Patch

Pull Requests

Add a Pull Request

History

AllCommentsChangesGit/SVN commitsRelated reports
 [2010-10-19 13:12 UTC] martynas at venck dot us
Actually, Apache scans it line-oriented;  which makes it even easier.

Currently the parser is only 30 lines long.

We've been running with this in production;  this fixed leading space issue for our client.  (#18886)

Index: htscanner.c
===================================================================
--- htscanner.c	(revision 304454)
+++ htscanner.c	(working copy)
@@ -35,7 +35,8 @@
 
 int (*php_cgi_sapi_activate)(TSRMLS_D);
 
-#define FILE_BUFFER 1000
+#define FILE_BUFFER 1024
+#define FILE_SEPARATOR " \t\r\n"
 #define HTSCANNER_DEBUG 0
 #define HTSCANNER_ENABLE_CACHE 1 
 
@@ -61,12 +62,6 @@
 /* }}} */
 #endif
 
-#define PHP_HTSCANNER_LTRIM(p) { \
-	while ((*p == ' ' || *p == '\t' || *p == '\r' || *p == '\v') && (*p != '\0')) { \
-		p++; \
-	} \
-}
-
 #define RETURN_FAILURE(msg) { \
 	if (HTG(stop_on_error) > 0) { \
 		if (msg) { \
@@ -179,57 +174,6 @@
 }
 /* }}} */
 
-/* {{{ value_hnd_strip
- * Parse an option and try to set the option
- */
-static int value_hnd_strip(char *string, int flag, int mode, HashTable *ini_entries TSRMLS_DC)
-{
-	char *name;
-	char *value;
-	int value_len;
-
-	name = string;
-	/* strip any leading whitespaces or tabs from the name */
-	PHP_HTSCANNER_LTRIM(name);
-	value = strchr(name, ' ');
-	if (!value) {
-		value = strchr(name, '\t');
-	}
-	if (value) {
-		*value = 0;
-		++value;
-		PHP_HTSCANNER_LTRIM(value);
-
-		/*
-		 * strip EOL characters (CRLF/LF/CR) if needed.
-		 * Boris HUISGEN <bhuisgen@hbis.fr>
-		 */
-		value_len = strlen(value);
-		if (value_len > 2 && value[value_len - 2] == '\r') {
-			value[value_len - 2] = 0;
-		} else if (value[value_len - 1] == '\n') {
-			value[value_len - 1] = 0;
-		} else if (value[value_len - 1] == '\r') {
-			value[value_len - 1] = 0;
-		} else {
-			value[value_len] = 0;
-		}
-
-		/* strip quoting characters */
-		value_len = strlen(value);
-		if ((value[0] == '\'' && value[value_len - 1] == '\'') ||
-				(value[0] == '\"' && value[value_len - 1] == '\"')) {
-			value[value_len - 1] = 0;
-			value++;
-		}
-
-		return value_hnd(name, value, flag, mode, ini_entries TSRMLS_CC);
-	}
-
-	return FAILURE;
-}
-/* }}} */
-
 /* {{{ parse_config_file
  * Parse the configuration file
  */
@@ -253,18 +197,37 @@
 #endif
 
 	stream = php_stream_open_wrapper(file, "rb", ENFORCE_SAFE_MODE, NULL);
-
 	if (stream != NULL) {
-		char buf[FILE_BUFFER];
-		char *pos;
-		while ((pos = php_stream_gets(stream, buf, FILE_BUFFER)) != NULL) {
-			/* strip leading spaces or tabs */
-			PHP_HTSCANNER_LTRIM(pos);
-
-			if (strncmp(pos, "php_value", sizeof("php_value") - 1) == 0) {
-				value_hnd_strip(pos + sizeof("php_value"), 0, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
-			} else if (strncmp(pos, "php_flag", sizeof("php_flag") - 1) == 0) {
-				value_hnd_strip(pos + sizeof("php_flag"), 1, PHP_INI_PERDIR, ini_entries 
TSRMLS_CC);
+		char buf[FILE_BUFFER], *bufp;
+		int parse = 1;
+		while ((bufp = php_stream_gets(stream, buf, FILE_BUFFER)) != NULL) {
+			char *tok, *last;
+			int flag = 0;
+			if ((tok = strtok_r(bufp, FILE_SEPARATOR, &last)) == NULL)
+				continue;
+			if (!strcasecmp(tok, "<IfModule")) {
+				tok = strtok_r(NULL, FILE_SEPARATOR, &last);
+				parse = tok && strcasecmp(tok, "mod_php5.c>") == 0;
+			} else if (!strcasecmp(tok, "</IfModule>")) {
+				parse = 1;
+			} else if (parse && ((flag = !strcasecmp(tok, "php_flag")) || !strcasecmp(tok, 
"php_value"))) {
+				char *name, *sep;
+				if ((name = strtok_r(NULL, FILE_SEPARATOR, &last)) == NULL || last == NULL)
+					continue;
+				last += strspn(last, FILE_SEPARATOR);
+				switch (*last) {
+				case '\'':
+					sep = "'";
+					break;
+				case '"':
+					sep = "\"";
+					break;
+				default:
+					sep = FILE_SEPARATOR;
+				}
+				if ((tok = strtok_r(NULL, sep, &last)) == NULL)
+					continue;
+				value_hnd(name, tok, flag, PHP_INI_PERDIR, ini_entries TSRMLS_CC);
 			}
 		}
 		php_stream_close(stream);
 [2010-11-21 20:19 UTC] martynas at venck dot us
The htscanner parser code has been rewritten in trunk.
 
PHP Copyright © 2001-2024 The PHP Group
All rights reserved.
Last updated: Thu Mar 28 20:01:28 2024 UTC