|
php.net | support | documentation | report a bug | advanced search | search howto | statistics | random bug | login |
PatchesPull RequestsHistoryAllCommentsChangesGit/SVN commits
[2002-01-14 09:59 UTC] lobbin@php.net
[2002-02-27 00:00 UTC] php-bugs at lists dot php dot net
[2006-10-12 14:33 UTC] jnicoll at seemysites dot net
[2008-08-29 15:08 UTC] jpyeron at pdinc dot us
[2008-08-29 16:06 UTC] jpyeron at pdinc dot us
|
|||||||||||||||||||||||||||||||||||||
Copyright © 2001-2025 The PHP GroupAll rights reserved. |
Last updated: Sun Oct 26 20:00:01 2025 UTC |
When parsing large XML files the PHP module starts corrupting memory. In large applications this may result in a segfault, in this example application it results in corruption of the xml parser itself (at least on my test machines). The code below is tested on three different servers, 2 of them running Debian/Stable (Potato) with a custom compiled PHP 4.0.5, one server running Debian/Unstable (Sid) with the Debian default PHP module (also 4.0.5). All run PHP as Apache module. The custom build modules are build like this: ./configure --with-apxs=/usr/bin/apxs --with-mysql=/usr --with-config-file-path=/etc/php4/apache --with-interbase=shared --with-gnu-ld --with-xml --with-gd=shared When using a small XML file, the code below runs fine, but when the XML file gets bigger PHP starts complaining Unable to call handler end_tag_handler() , Unable to call handler data_handler() or Unable to call handler start_tag_handler(). The code: <?php class properties { var $parser; } class my_class { var $mydata; var $mystarttag; var $myendtag; function my_class(&$props) { xml_set_object($props->parser,&$this); xml_set_element_handler($props->parser, 'start_tag_handler', 'end_tag_handler'); xml_set_character_data_handler($props->parser, 'data_handler'); } function data_handler($parser, $data) { $this->mydata .= $data; } function start_tag_handler($parser, $name, $attrs) { $this->mystarttag .= $name; } function end_tag_handler($parser, $name) { $this->myendtag .= $name; echo 'end tag is '.$name.'<br>'; } } function first_element_handler($parser, $name, $attrs) { global $props; if ($name == 'QUESTION') { /* normally I decide here, depending on $attrs what kind of object I should create. For debugging purposes I use just one object. */ $q_obj = new my_class($props); } else { die('This XML file has an unknown content'); } } function start_parsing($file) { global $props; $props = new properties(); $props->parser = xml_parser_create(); xml_parser_set_option($props->parser, XML_OPTION_CASE_FOLDING, TRUE); xml_set_element_handler($props->parser, 'first_element_handler', ''); if (!($fp = fopen($file, "r"))) { die("could not open XML input"); } while ($data = fread($fp, 4096)) { if (!xml_parse($props->parser, $data, feof($fp))) { die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($props->parser)), xml_get_current_line_number($props->parser))); } } xml_parser_free($props->parser); } echo 'start parsing'; start_parsing('val1.xml'); echo 'finished parsing'; ?> The XML file looks like this, but to see the problems the part between <question> and </question> should be four times bigger: <question type="VALUE"> <title>Fermentor size</title> <hint>4032 hours a year, makes 96 runs a year. How many kg per cubic meter per year do you produce?</hint> <hint>That is 10 X 96 = 960 kg per cubic meter per year</hint> <hint>You need 10.000 kg, so you need 10.4 cubic meter production</hint> <hint>You can't fill the reactor 100%, so let's assume 12 cubic meter will do</hint> <range correct="1"> <minval>11</minval> <maxval>15</maxval> <feedback>Very good! <b>Assuming</b> you can fill a reactor....</feedback> </range> <range correct="0"> <minval>1</minval> <maxval>11</maxval> <feedback>Sorry, try again</feedback> </range> </question>