php.net |  support |  documentation |  report a bug |  advanced search |  search howto |  statistics |  random bug |  login
Return to Bug #51983
Patch init_request_info.patch revision 2010-06-03 02:43 UTC by konstantin at symbi dot org

Patch init_request_info.patch for FPM related Bug #51983

Patch version 2010-06-03 02:43 UTC

Return to Bug #51983 | Download this patch
Patch Revisions:

Developer: konstantin@symbi.org

--- fpm_main.c.orig	2010-05-26 20:36:02.000000000 +0400
+++ fpm_main.c	2010-06-03 06:15:47.000000000 +0400
@@ -1011,83 +1011,59 @@
 
 /* {{{ init_request_info
 
-  initializes request_info structure
+  Initializes request_info structure.
 
-  specificly in this section we handle proper translations
-  for:
+  Assume we have the http://host/folder/script.php/extra?a=b URL, and DocumentRoot=/docroot.
+  The following environment variables are expected:
 
-  PATH_INFO
-	derived from the portion of the URI path following
-	the script name but preceding any query data
-	may be empty
-
-  PATH_TRANSLATED
-    derived by taking any path-info component of the
-	request URI and performing any virtual-to-physical
-	translation appropriate to map it onto the server's
-	document repository structure
-
-	empty if PATH_INFO is empty
-
-	The env var PATH_TRANSLATED **IS DIFFERENT** than the
-	request_info.path_translated variable, the latter should
-	match SCRIPT_FILENAME instead.
-
-  SCRIPT_NAME
-    set to a URL path that could identify the CGI script
-	rather than the interpreter.  PHP_SELF is set to this
-
-  REQUEST_URI
-    uri section following the domain:port part of a URI
-
-  SCRIPT_FILENAME
-    The virtual-to-physical translation of SCRIPT_NAME (as per
-	PATH_TRANSLATED)
-
-  These settings are documented at
-  http://cgi-spec.golux.com/
-
-
-  Based on the following URL request:
-
-  http://localhost/info.php/test?a=b
-
-  should produce, which btw is the same as if
-  we were running under mod_cgi on apache (ie. not
-  using ScriptAlias directives):
-
-  PATH_INFO=/test
-  PATH_TRANSLATED=/docroot/test
-  SCRIPT_NAME=/info.php
-  REQUEST_URI=/info.php/test?a=b
-  SCRIPT_FILENAME=/docroot/info.php
-  QUERY_STRING=a=b
-
-  but what we get is (cgi/mod_fastcgi under apache):
-
-  PATH_INFO=/info.php/test
-  PATH_TRANSLATED=/docroot/info.php/test
-  SCRIPT_NAME=/php/php-cgi  (from the Action setting I suppose)
-  REQUEST_URI=/info.php/test?a=b
-  SCRIPT_FILENAME=/path/to/php/bin/php-cgi  (Action setting translated)
-  QUERY_STRING=a=b
-
-  Comments in the code below refer to using the above URL in a request
-
- */
+  RFC3875  Variable             Description                              URL part
+  -------  -------------------  ---------------------------------------  ---------------------------
+  Yes      PATH_INFO            The extra path information, as given     /extra
+                                by the client. May be empty.
+  Yes      PATH_TRANSLATED      Virtual-to-physical mapping for          /docroot/extra
+                                PATH_INFO. Empty if PATH_INFO is empty
+  Yes      SCRIPT_NAME          Identifies script name, PHP_SELF         /script.php
+
+  Yes      QUERY_STRING         Self-explanatory                         a=b
+
+  No       SCRIPT_FILENAME      Virtual-to-physical mapping for          /docroot/script.php
+                                SCRIPT_NAME.
+  No       REQUEST_URI          Everything after the host[:port] part    /script.php/extra?a=b
+
+  No       DOCUMENT_ROOT        DocumentRoot defined in web server       /docroot
+                                configuration
+
+  According to RFC 3875 (CGI 1.1), only the first 4 headers are required.
+
+  The tricky part here is that SCRIPT_FILENAME variable is not defined by RFC; it probably made
+  no sence in the age cgi-bin executables, but we need to resolve the full path when SCRIPT_FILENAME
+  is missing. The original cgi/fcgi sapi uses a complex "let's guess where can it be" algorihtm.
+  Well, it is flexible and workarounds a lot of web servers' oddities; but checking each variant is
+  slow and requires extra system calls. And why would you use fpm sapi if you do not care of speed?
+  So we use a simple algorithm which does not require extra syscalls and matches most configurations:
+  1) If SCRIPT_FILENAME is defined, rely on its value;
+  2) If SCRIPT_NAME and DOCUMENT_ROOT are defined, SCRIPT_FILENAME = DOCUMENT_ROOT . SCRIPT_NAME;
+  3) If PATH_TRANSLATED is defined, SCRIPT_FILENAME = PATH_TRANSLATED;
+  4) Give up.
+
+  In the request_info structure, 'path_translated' name is used due to historical reasons;
+  it should be read as 'script_filename' really.
+*/
 static void init_request_info(TSRMLS_D)
 {
+	const char *auth;
 	char *env_script_filename = sapi_cgibin_getenv("SCRIPT_FILENAME", sizeof("SCRIPT_FILENAME")-1 TSRMLS_CC);
+	char *env_doc_root        = sapi_cgibin_getenv("DOCUMENT_ROOT", sizeof("DOCUMENT_ROOT")-1 TSRMLS_CC);
+	char *env_script_name     = sapi_cgibin_getenv("SCRIPT_NAME", sizeof("SCRIPT_NAME")-1 TSRMLS_CC);
+	char *content_length      = sapi_cgibin_getenv("CONTENT_LENGTH", sizeof("CONTENT_LENGTH")-1 TSRMLS_CC);
+	char *content_type        = sapi_cgibin_getenv("CONTENT_TYPE", sizeof("CONTENT_TYPE")-1 TSRMLS_CC);
+	char *env_path_info       = sapi_cgibin_getenv("PATH_INFO", sizeof("PATH_INFO")-1 TSRMLS_CC);
 	char *env_path_translated = sapi_cgibin_getenv("PATH_TRANSLATED", sizeof("PATH_TRANSLATED")-1 TSRMLS_CC);
-	char *script_path_translated = env_script_filename;
-	char *ini;
-
-	/* some broken servers do not have script_filename or argv0
-	 * an example, IIS configured in some ways.  then they do more
-	 * broken stuff and set path_translated to the cgi script location */
-	if (!script_path_translated && env_path_translated) {
-		script_path_translated = env_path_translated;
-	}
+	char *env_request_uri     = sapi_cgibin_getenv("REQUEST_URI", sizeof("REQUEST_URI")-1 TSRMLS_CC);
+	char *script_filename     = NULL;
+	char *request_uri         = NULL;
+	char *ini                 = NULL;
+	int script_filename_len   = -1;
 
 	/* initialize the defaults */
 	SG(request_info).path_translated = NULL;
@@ -1099,273 +1075,60 @@
 	SG(request_info).content_length = 0;
 	SG(sapi_headers).http_response_code = 200;
 
-	/* script_path_translated being set is a good indication that
-	 * we are running in a cgi environment, since it is always
-	 * null otherwise.  otherwise, the filename
-	 * of the script will be retreived later via argc/argv */
-	if (script_path_translated) {
-		const char *auth;
-		char *content_length = sapi_cgibin_getenv("CONTENT_LENGTH", sizeof("CONTENT_LENGTH")-1 TSRMLS_CC);
-		char *content_type = sapi_cgibin_getenv("CONTENT_TYPE", sizeof("CONTENT_TYPE")-1 TSRMLS_CC);
-		char *env_path_info = sapi_cgibin_getenv("PATH_INFO", sizeof("PATH_INFO")-1 TSRMLS_CC);
-		char *env_script_name = sapi_cgibin_getenv("SCRIPT_NAME", sizeof("SCRIPT_NAME")-1 TSRMLS_CC);
-
-		/* Hack for buggy IIS that sets incorrect PATH_INFO */
-		char *env_server_software = sapi_cgibin_getenv("SERVER_SOFTWARE", sizeof("SERVER_SOFTWARE")-1 TSRMLS_CC);
-		if (env_server_software &&
-			env_script_name &&
-			env_path_info &&
-			strncmp(env_server_software, "Microsoft-IIS", sizeof("Microsoft-IIS")-1) == 0 &&
-			strncmp(env_path_info, env_script_name, strlen(env_script_name)) == 0
-		) {
-			env_path_info = _sapi_cgibin_putenv("ORIG_PATH_INFO", env_path_info TSRMLS_CC);
-			env_path_info += strlen(env_script_name);
-			if (*env_path_info == 0) {
-				env_path_info = NULL;
-			}
-			env_path_info = _sapi_cgibin_putenv("PATH_INFO", env_path_info TSRMLS_CC);
-		}
+	if (env_script_filename) {
+		script_filename = env_script_filename;
+	} else {
+		do { /* while(0) */
+			if (env_doc_root) {
+				int doc_root_len = strlen(env_doc_root);
 
-		if (CGIG(fix_pathinfo)) {
-			struct stat st;
-			char *real_path = NULL;
-			char *env_redirect_url = sapi_cgibin_getenv("REDIRECT_URL", sizeof("REDIRECT_URL")-1 TSRMLS_CC);
-			char *env_document_root = sapi_cgibin_getenv("DOCUMENT_ROOT", sizeof("DOCUMENT_ROOT")-1 TSRMLS_CC);
-			char *orig_path_translated = env_path_translated;
-			char *orig_path_info = env_path_info;
-			char *orig_script_name = env_script_name;
-			char *orig_script_filename = env_script_filename;
-			int script_path_translated_len;
-
-			if (!env_document_root && PG(doc_root)) {
-				env_document_root = _sapi_cgibin_putenv("DOCUMENT_ROOT", PG(doc_root) TSRMLS_CC);
-				/* fix docroot */
-				TRANSLATE_SLASHES(env_document_root);
-			}
-
-			if (env_path_translated != NULL && env_redirect_url != NULL &&
-			    env_path_translated != script_path_translated &&
-			    strcmp(env_path_translated, script_path_translated) != 0) {
-				/*
-				 * pretty much apache specific.  If we have a redirect_url
-				 * then our script_filename and script_name point to the
-				 * php executable
-				 */
-				script_path_translated = env_path_translated;
-				/* we correct SCRIPT_NAME now in case we don't have PATH_INFO */
-				env_script_name = env_redirect_url;
-			}
-
-#ifdef __riscos__
-			/* Convert path to unix format*/
-			__riscosify_control |= __RISCOSIFY_DONT_CHECK_DIR;
-			script_path_translated = __unixify(script_path_translated, 0, NULL, 1, 0);
-#endif
-
-			/*
-			 * if the file doesn't exist, try to extract PATH_INFO out
-			 * of it by stat'ing back through the '/'
-			 * this fixes url's like /info.php/test
-			 */
-			if (script_path_translated &&
-				(script_path_translated_len = strlen(script_path_translated)) > 0 &&
-				(script_path_translated[script_path_translated_len-1] == '/' ||
-#ifdef PHP_WIN32
-				script_path_translated[script_path_translated_len-1] == '\\' ||
-#endif
-				(real_path = tsrm_realpath(script_path_translated, NULL TSRMLS_CC)) == NULL)
-			) {
-				char *pt = estrndup(script_path_translated, script_path_translated_len);
-				int len = script_path_translated_len;
-				char *ptr;
-
-				while ((ptr = strrchr(pt, '/')) || (ptr = strrchr(pt, '\\'))) {
-					*ptr = 0;
-					if (stat(pt, &st) == 0 && S_ISREG(st.st_mode)) {
-						/*
-						 * okay, we found the base script!
-						 * work out how many chars we had to strip off;
-						 * then we can modify PATH_INFO
-						 * accordingly
-						 *
-						 * we now have the makings of
-						 * PATH_INFO=/test
-						 * SCRIPT_FILENAME=/docroot/info.php
-						 *
-						 * we now need to figure out what docroot is.
-						 * if DOCUMENT_ROOT is set, this is easy, otherwise,
-						 * we have to play the game of hide and seek to figure
-						 * out what SCRIPT_NAME should be
-						 */
-						int slen = len - strlen(pt);
-						int pilen = env_path_info ? strlen(env_path_info) : 0;
-						char *path_info = env_path_info ? env_path_info + pilen - slen : NULL;
-
-						if (orig_path_info != path_info) {
-							if (orig_path_info) {
-								char old;
-
-								_sapi_cgibin_putenv("ORIG_PATH_INFO", orig_path_info TSRMLS_CC);
-								old = path_info[0];
-								path_info[0] = 0;
-								if (!orig_script_name ||
-									strcmp(orig_script_name, env_path_info) != 0) {
-									if (orig_script_name) {
-										_sapi_cgibin_putenv("ORIG_SCRIPT_NAME", orig_script_name TSRMLS_CC);
-									}
-									SG(request_info).request_uri = _sapi_cgibin_putenv("SCRIPT_NAME", env_path_info TSRMLS_CC);
-								} else {
-									SG(request_info).request_uri = orig_script_name;
-								}
-								path_info[0] = old;
-							}
-							env_path_info = _sapi_cgibin_putenv("PATH_INFO", path_info TSRMLS_CC);
-						}
-						if (!orig_script_filename ||
-							strcmp(orig_script_filename, pt) != 0) {
-							if (orig_script_filename) {
-								_sapi_cgibin_putenv("ORIG_SCRIPT_FILENAME", orig_script_filename TSRMLS_CC);
-							}
-							script_path_translated = _sapi_cgibin_putenv("SCRIPT_FILENAME", pt TSRMLS_CC);
-						}
-						TRANSLATE_SLASHES(pt);
-
-						/* figure out docroot
-						 * SCRIPT_FILENAME minus SCRIPT_NAME
-						 */
-						if (env_document_root) {
-							int l = strlen(env_document_root);
-							int path_translated_len = 0;
-							char *path_translated = NULL;
-
-							if (l && env_document_root[l - 1] == '/') {
-								--l;
-							}
-
-							/* we have docroot, so we should have:
-							 * DOCUMENT_ROOT=/docroot
-							 * SCRIPT_FILENAME=/docroot/info.php
-							 */
-
-							/* PATH_TRANSLATED = DOCUMENT_ROOT + PATH_INFO */
-							path_translated_len = l + (env_path_info ? strlen(env_path_info) : 0);
-							path_translated = (char *) emalloc(path_translated_len + 1);
-							memcpy(path_translated, env_document_root, l);
-							if (env_path_info) {
-								memcpy(path_translated + l, env_path_info, (path_translated_len - l));
-							}
-							path_translated[path_translated_len] = '\0';
-							if (orig_path_translated) {
-								_sapi_cgibin_putenv("ORIG_PATH_TRANSLATED", orig_path_translated TSRMLS_CC);
-							}
-							env_path_translated = _sapi_cgibin_putenv("PATH_TRANSLATED", path_translated TSRMLS_CC);
-							efree(path_translated);
-						} else if (	env_script_name &&
-									strstr(pt, env_script_name)
-						) {
-							/* PATH_TRANSLATED = PATH_TRANSLATED - SCRIPT_NAME + PATH_INFO */
-							int ptlen = strlen(pt) - strlen(env_script_name);
-							int path_translated_len = ptlen + (env_path_info ? strlen(env_path_info) : 0);
-							char *path_translated = NULL;
-
-							path_translated = (char *) emalloc(path_translated_len + 1);
-							memcpy(path_translated, pt, ptlen);
-							if (env_path_info) {
-								memcpy(path_translated + ptlen, env_path_info, path_translated_len - ptlen);
-							}
-							path_translated[path_translated_len] = '\0';
-							if (orig_path_translated) {
-								_sapi_cgibin_putenv("ORIG_PATH_TRANSLATED", orig_path_translated TSRMLS_CC);
-							}
-							env_path_translated = _sapi_cgibin_putenv("PATH_TRANSLATED", path_translated TSRMLS_CC);
-							efree(path_translated);
-						}
-						break;
-					}
-				}
-				if (!ptr) {
-					/*
-					 * if we stripped out all the '/' and still didn't find
-					 * a valid path... we will fail, badly. of course we would
-					 * have failed anyway... we output 'no input file' now.
-					 */
-					if (orig_script_filename) {
-						_sapi_cgibin_putenv("ORIG_SCRIPT_FILENAME", orig_script_filename TSRMLS_CC);
-					}
-					script_path_translated = _sapi_cgibin_putenv("SCRIPT_FILENAME", NULL TSRMLS_CC);
-					SG(sapi_headers).http_response_code = 404;
-				}
-				if (!SG(request_info).request_uri) {
-					if (!orig_script_name ||
-						strcmp(orig_script_name, env_script_name) != 0) {
-						if (orig_script_name) {
-							_sapi_cgibin_putenv("ORIG_SCRIPT_NAME", orig_script_name TSRMLS_CC);
-						}
-						SG(request_info).request_uri = _sapi_cgibin_putenv("SCRIPT_NAME", env_script_name TSRMLS_CC);
-					} else {
-						SG(request_info).request_uri = orig_script_name;
-					}
-				}
-				if (pt) {
-					efree(pt);
-				}
-			} else {
-				/* make sure path_info/translated are empty */
-				if (!orig_script_filename ||
-					(script_path_translated != orig_script_filename &&
-					strcmp(script_path_translated, orig_script_filename) != 0)) {
-					if (orig_script_filename) {
-						_sapi_cgibin_putenv("ORIG_SCRIPT_FILENAME", orig_script_filename TSRMLS_CC);
-					}
-					script_path_translated = _sapi_cgibin_putenv("SCRIPT_FILENAME", script_path_translated TSRMLS_CC);
-				}
-				if (env_redirect_url) {
-					if (orig_path_info) {
-						_sapi_cgibin_putenv("ORIG_PATH_INFO", orig_path_info TSRMLS_CC);
-						_sapi_cgibin_putenv("PATH_INFO", NULL TSRMLS_CC);
-					}
-					if (orig_path_translated) {
-						_sapi_cgibin_putenv("ORIG_PATH_TRANSLATED", orig_path_translated TSRMLS_CC);
-						_sapi_cgibin_putenv("PATH_TRANSLATED", NULL TSRMLS_CC);
-					}
-				}
-				if (env_script_name != orig_script_name) {
-					if (orig_script_name) {
-						_sapi_cgibin_putenv("ORIG_SCRIPT_NAME", orig_script_name TSRMLS_CC);
-					}
-					SG(request_info).request_uri = _sapi_cgibin_putenv("SCRIPT_NAME", env_script_name TSRMLS_CC);
-				} else {
-					SG(request_info).request_uri = env_script_name;
+				if (env_script_name) {
+					int script_name_len = strlen(env_script_name);
+					script_filename_len = doc_root_len + script_name_len;
+
+					script_filename = (char *) emalloc(script_filename_len+1);
+					memcpy(script_filename               , env_doc_root   , doc_root_len);
+					memcpy(script_filename + doc_root_len, env_script_name, script_name_len);
+					script_filename[script_filename_len] = '\0';
+
+					break;
 				}
-				free(real_path);
 			}
-		} else {
-			/* pre 4.3 behaviour, shouldn't be used but provides BC */
-			if (env_path_info) {
-				SG(request_info).request_uri = env_path_info;
-			} else {
-				SG(request_info).request_uri = env_script_name;
-			}
-			if (!CGIG(discard_path) && env_path_translated) {
-				script_path_translated = env_path_translated;
+
+			if (env_path_translated) {
+				script_filename = env_path_translated;
 			}
-		}
+		} while (0);
+	}
 
-		if (is_valid_path(script_path_translated)) {
-			SG(request_info).path_translated = estrdup(script_path_translated);
-		}
+	if (env_request_uri) {
+		request_uri = env_request_uri;
+	} else if (env_path_info) {
+		request_uri = env_path_info;
+	} else if (env_script_name) {
+		request_uri = env_script_name;
+	}
+
+	SG(request_info).request_uri = request_uri ? estrndup(request_uri, strcspn(request_uri, "?")) : NULL;
+
+	if (script_filename && is_valid_path(script_filename)) {
+		SG(request_info).path_translated = estrdup(script_filename);
+	}
 
-		SG(request_info).request_method = sapi_cgibin_getenv("REQUEST_METHOD", sizeof("REQUEST_METHOD")-1 TSRMLS_CC);
-		/* FIXME - Work out proto_num here */
-		SG(request_info).query_string = sapi_cgibin_getenv("QUERY_STRING", sizeof("QUERY_STRING")-1 TSRMLS_CC);
-		SG(request_info).content_type = (content_type ? content_type : "" );
-		SG(request_info).content_length = (content_length ? atoi(content_length) : 0);
-
-		/* The CGI RFC allows servers to pass on unvalidated Authorization data */
-		auth = sapi_cgibin_getenv("HTTP_AUTHORIZATION", sizeof("HTTP_AUTHORIZATION")-1 TSRMLS_CC);
-		php_handle_auth_data(auth TSRMLS_CC);
+	if (script_filename_len != -1) {
+		efree(script_filename);
 	}
 
+	SG(request_info).request_method = sapi_cgibin_getenv("REQUEST_METHOD", sizeof("REQUEST_METHOD")-1 TSRMLS_CC);
+	/* FIXME - Work out proto_num here */
+	SG(request_info).query_string = sapi_cgibin_getenv("QUERY_STRING", sizeof("QUERY_STRING")-1 TSRMLS_CC);
+	SG(request_info).content_type = (content_type ? content_type : "");
+	SG(request_info).content_length = (content_length ? atoi(content_length) : 0);
+
+	/* The CGI RFC allows servers to pass on unvalidated Authorization data */
+	auth = sapi_cgibin_getenv("HTTP_AUTHORIZATION", sizeof("HTTP_AUTHORIZATION")-1 TSRMLS_CC);
+	php_handle_auth_data(auth TSRMLS_CC);
+
 	/* INI stuff */
 	ini = sapi_cgibin_getenv("PHP_VALUE", sizeof("PHP_VALUE")-1 TSRMLS_CC);
 	if (ini) {
 
PHP Copyright © 2001-2024 The PHP Group
All rights reserved.
Last updated: Fri Apr 19 18:01:28 2024 UTC