|
php.net | support | documentation | report a bug | advanced search | search howto | statistics | random bug | login |
[2012-12-24 06:48 UTC] dean at ovts dot com dot au
Description: ------------ --- From manual page: http://www.php.net/function.curl-multi-exec --- I think there's a bug in the sample code. If curl_multi_select() returns -1, then the while{} loop just keeps calling it until it returns something else. So if repeated calles to curl_multi_select() can return -1 indefinitely (and the comments on the manual page for that function suggest it can), then the example code will just go into an infinite loop. PatchesPull RequestsHistoryAllCommentsChangesGit/SVN commits
|
|||||||||||||||||||||||||||
Copyright © 2001-2025 The PHP GroupAll rights reserved. |
Last updated: Wed Oct 22 07:00:01 2025 UTC |
That would certainly fix the inifinite loop problem. I'm not clear on why you'd need to wait 100ms (or some other arbitrary amount of time) before calling curl_multi_exec again... this just seems like bad design to me (even if the libcurl docs recommend it). How many times might you end up doing this 100ms sleep? The whole point of curl_multi_select is to sleep exactly the minimum amount, and wake up as soon as some curl operation is ready to continue. What would happen if you simply ignored the return value of curl_multi_select, and did this: while (curl_multi_exec($mh, $active) === CURLM_CALL_MULTI_PERFORM); do { curl_multi_select($curlMH); while (curl_multi_exec($curlMH, $active) === CURLM_CALL_MULTI_PERFORM); } while ($active); (Also, do we need to break if curl_multi_exec doesn't return CURLM_OK, or can we just keep going until $active is false, as I've done above? What would it mean if $active were true, but the return value wasn't CURLM_OK?)Well, I've looked over the relevant libcurl docs, and taken a look at how others on the internet are using curl_multi, and based on that, here's what I think represents best-practice, as example code for including in the PHP documentation pages: $MAX_SIMULTANEOUS = 50; // Adjust to whatever number of maximum simultaneous requests you think is appropriate. // Fill array $reqList[] with whatever data you're using to generate your CURL requests, // one request per array element, with consecutive indices starting at 0. // note that requests at the END of the array will be started first. if ($i = count($reqList)) // check that there is something to do... { $handleMap = new SplObjectStorage; // use this to associate curl handles with the data that created them $curlMH = curl_multi_init(); $x = $i - $MAX_SIMULTANEOUS; if ($x<0) $x = 0; while ($i>$x) initRequest($reqList[--$i]); do { while (($mrc = curl_multi_exec($curlMH, $active)) === CURLM_CALL_MULTI_PERFORM); if ($mrc !== CURLM_OK) break; // Shouldn't normally ever happen; look at the list of CURLM_ errors to see when it might. while ($info = curl_multi_info_read($curlMH)) // a request has completed { $ch = $info['handle']; if (($errNo = $info['result']) === CURLE_OK) processResult($handleMap[$ch], curl_multi_getcontent($ch)); else ... // handle failed CURL request, (e.g. write to the error log or database, output a message, etc.) if ($i) initRequest($reqList[--$i]); // start new request if one is waiting curl_multi_remove_handle($curlMH, $ch); curl_close($ch); $handleMap->detach($ch); // clean up completed request } // wait for next CURL operation to complete, or sleep a short time if CURL is busy but unable to "block" using curl_multi_select if ($active && (curl_multi_select($curlMH) === -1)) usleep(50); } while ($active); if ($mrc !== CURLM_OK) ... // optinally handle CURLM_ errors (e.g. write to error log, output message, etc.) // clean up foreach ($handleMap as $ch) { curl_multi_remove_handle($curlMH, $ch); curl_close($ch); } curl_multi_close($curlMH); $reqList = $curlMH = $handleMap = null; } function initRequest($reqData) { global $curlMH, $handleMap; // process $reqData to get $url (the request URL), and any POST data, or other CURL options. // also generate $custReqData, which will be passed to processResult to identify the request // when it has completed (may be the same as $reqData). $ch = curl_init($url); curl_setopt($ch, ...); // set other CURL options curl_setopt($ch, ...); curl_multi_add_handle($curlMH, $ch); $handleMap[$ch] = $custReqData; } function processResult($custReqData, $response) { // process $response (the data returned from the CURL request) // using $custReqData to identify which request it belongs to. }Oops... looks like you can't use splObjectStorage to store resources... So, I fixed it by using an array instead. I make use of the fact that ((string) $resource) is a string like "Resource id #123", where the "123" is a unique integer identifying a (current) resource. I trim the first 13 chars and convert the number part to an integer. NOTE that this string format isn't guaranteed to remain unchanged in later versions of PHP, so you'd either have to check for a change after upgrading, or else you could just use the entire string as an array key (instead of just the integer part), at the cost of some efficiency. Here's the fixed sample code (comments/discussion welcome!): $MAX_SIMULTANEOUS = 50; // Adjust to whatever number of maximum simultaneous requests you think is appropriate. // Fill array $reqList[] with whatever data you're using to generate your CURL requests, // one request per array element, with consecutive indices starting at 0. // note that requests at the END of the array will be started first. if ($i = count($reqList)) // check that there is something to do... { $handleMap = array(); // use this to associate curl handles with the data that created them $curlMH = curl_multi_init(); $x = $i - $MAX_SIMULTANEOUS; if ($x<0) $x = 0; while ($i>$x) initRequest($reqList[--$i]); do { while (($mrc = curl_multi_exec($curlMH, $active)) === CURLM_CALL_MULTI_PERFORM); if ($mrc !== CURLM_OK) break; // Shouldn't normally ever happen; look at the list of CURLM_ errors to see when it might. while ($info = curl_multi_info_read($curlMH)) // a request has completed { $ch = $info['handle']; $k = intval(substr((string) $ch, 13)); if (($errno = $info['result']) === CURLE_OK) processResult($handleMap[$k][1], curl_multi_getcontent($ch)); else ... // handle failed CURL request, (e.g. write to the error log or database, output a message, etc.) if ($i) initRequest($reqList[--$i]); // start new request if one is waiting curl_multi_remove_handle($curlMH, $ch); curl_close($ch); unset($handleMap[$k]); // clean up completed request } // wait for next CURL operation to complete, or sleep a short time if CURL is busy but unable to "block" using curl_multi_select if ($active && (curl_multi_select($curlMH) === -1)) usleep(50); } while ($active); if ($mrc !== CURLM_OK) ... // optinally handle CURLM_ errors (e.g. write to error log, output message, etc.) // clean up foreach ($handleMap as $ch) { curl_multi_remove_handle($curlMH, $ch[0]); curl_close($ch); } curl_multi_close($curlMH); $reqList = $curlMH = $handleMap = null; } function initRequest($reqData) { global $curlMH, $handleMap; // process $reqData to get $url (the request URL), and any POST data, or other CURL options. // also generate $custReqData, which will be passed to processResult to identify the request // when it has completed (may be the same as $reqData). $ch = curl_init($url); curl_setopt($ch, ...); // set other CURL options curl_setopt($ch, ...); curl_multi_add_handle($curlMH, $ch); $handleMap[intval(substr((string) $ch, 13))] = array($ch, $custReqData); } function processResult($custReqData, $response) { // process $response (the data returned from the CURL request) // using $custReqData to identify which request it belongs to. }