|
php.net | support | documentation | report a bug | advanced search | search howto | statistics | random bug | login |
PatchesPull RequestsHistoryAllCommentsChangesGit/SVN commits
[2000-07-27 22:05 UTC] waldschrott@php.net
[2000-08-20 02:50 UTC] sniper@php.net
|
|||||||||||||||||||||||||||
Copyright © 2001-2025 The PHP GroupAll rights reserved. |
Last updated: Fri Nov 21 05:00:01 2025 UTC |
<!-- i debugged each an every line , however there's no error syntactically and semantically , i suspect that there is an error in either php interpreter or in apache-php system , we use apache 1.3.12 --> <!-- complete information about my system is http://bob.ulakbim.gov.tr/phpinfo.php --> <!-- thank you very much for your help effort --> <HTML> <HEAD> <TITLE> Web Spider with PHP </TITLE> </HEAD> <BODY> <?php function showstring($string,$ind_pos) // shows a string with ascii codes included // highlights the character in the $ind_pos of string { echo "Printing string:<br>"; echo "Length=".strlen($string)."<br>"; echo "<blockquote>"; $len=strlen($string); for ($i=0;$i<$len;$i++) { $a=$string[$i]; if ($i==$ind_pos) echo "<FONT COLOR=\"#FF0000\">"; if (ord($a)>32) echo " ".$a; else echo" (".ord($a).")"; if ($i==$ind_pos) echo "</FONT>"; } echo "<br></blockquote>"; echo "end of string.<br>"; } function kirp(&$string) // removes all leading whitespaces from the string { $len=strlen($string); for($i=0;ord($string[$i])<=32;$i++) if ($i>$len) return; $string=substr($string,$i); } $TEMP_FILE_NAME="pure"; $TARGET_URL="http://www.altavista.com"; $in_file=@fopen($TARGET_URL,"r"); if (!$in_file) {echo "Adress Not found.<Br>";exit;} $out_file=fopen($TEMP_FILE_NAME,"w"); //filesize("http://kasket.ulakbim.gov.tr"); while (!feof($in_file)) fputs($out_file,fgets($in_file,50)); //we will clean the HTML code and output to clean_file fclose ($out_file); fclose ($in_file); echo "<H5>HELLO</H5><BR>"; echo "<B>Parsing <i>$TARGET_URL</i></B><BR>"; $tags=""; $tags=get_meta_tags($TEMP_FILE_NAME); if ($tags["keywords"]=="") echo "<b>NO META KEYWORDS.</b><br>"; else echo "<b>META KEYWORDS DETECTED:</b>".$tags["keywords"]."<BR>"; if ($tags["description"]=="") echo "<b>NO DESCRIPTION.</b><br>"; else echo "<b>META DESCRIPTION DETECTED:</b>".$tags["description"]."<BR>"; //clearing all the tags except for the <A> tag $html_form=file($TEMP_FILE_NAME); $html_form=implode("",$html_form); //converting array to string // for shortening the parsing , we throw away the unnecessary html stuff $parse_string=strip_tags($html_form,"<A> <META> <BASE>"); // echo "$parse_string"; //checking for a <BASE> as a first step to use in the // <BASE> is assumed to be at the very beginning in the head part // after this step all the links will be reached through $base $parse_string=strtoupper($parse_string); //converting the string to uppercase chars // str functions are sometimes case-sensitive $k=strpos($parse_string,"<BASE"); if (!$k) { $base=$TARGET_URL; echo "<b>No relative base identifier. </b><br>"; } else { //getting the base for ($i=0;$i<$k;$i++) $parse_string[$i]=''; //echo "------------<BR>"; for ($i=0;$parse_string[$i]!="=";$i++) $parse_string[$i]=''; //exit; kirp($parse_string); // clearing the leading whitespaces //showstring($parse_string); $parse_string[0]='';//removing the = sign $new_base=""; for ($i=0;($read=$parse_string[$i])!='>';$i++) { if ($read!="\"" && ord($read)>32/*whitespaces*/) {echo ">$read".ord($read)."<BR>";$new_base=$new_base.$read;} $parse_string[$i]=''; } kirp($parse_string); $parse_string[0]=''; //final '>' kirp($parse_string); $base=$new_base; echo "<b>New Relative base identifier: <i>$base</i></b><br>"; } //finding the anchors echo "--Checking for the anchors in the document<br>"; while(1) { $k=strpos($parse_string,"<A HREF"); //showstring($parse_string,$k); //exit; if (!$k) {echo "<b> No links left (or found) to parse.</b><br>";break;} for ($i=0;$i<$k;$i++) $parse_string[$i]=''; kirp($parse_string); $len=strlen($parse_string); for ($i=0;$parse_string[$i]!='=';$i++) { if ($i>$len) {echo "parse error in html file";break;} $parse_string[$i]=''; } kirp($parse_string); $parse_string[0]=''; //removing the = sign //showstring($parse_string); //showstring($parse_string); $foundlink=""; echo "*"; $len=strlen($parse_string); for ($i=0;($read=$parse_string[$i])!='>';$i++) { if ($i>$len) {echo "parse error in html file";break;} if ($read!="\"" && ord($read)>32) $foundlink=$foundlink.$read; $parse_string[$i]=''; } kirp($parse_string); $parse_string[0]=''; // final '>' kirp($parse_string); // if not an interior link echo if ($foundlink[0]!="#") echo "<b>Found link: </b>".$foundlink."<br>"; } //echo $k; ?> </BODY> </HTML>