read())) { $a = explode("-", $entry); if ( is_numeric($a[0])) { $last_index = $a[0]; } } $d->close(); $last_index++; $local_files_path = $local_files_dir . "/" . str_pad($last_index, 8, "0", STR_PAD_LEFT) . "-" . $today; if ( !is_dir($local_files_path) ) { mkdir($local_files_path, 0777, true); } // the local filename to save the html $filename = "index.html"; // get the html from remote server $html = do_post_request( $url ); file_put_contents( $local_files_path . "/" . $filename, $html ); $html = str_replace("\"", "'", $html); // now parse echo "<pre>"; //echo htmlentities($html); // get all
's with regex preg_match_all("|]+>(.*)]+>|U", $html, $matches); // loop through the matches with foreach $i = 0; foreach($matches[0] as $value) { //echo htmlentities($value) . "\n"; // filter the one that we need // if (strstr($value, "class='gb1'")) { $i++; // echo htmlentities($value) . "\n"; preg_match("/href='.*'/", $value, $m); $links[$i] = $m[0]; // } } // print the found links of interest foreach($links as $value) { echo htmlentities($value) . "\n";
// here you can parse the found links } echo ""; function do_post_request($url, $data = null, $optional_headers = null) { $params = array('http' => array( 'method' => 'POST', 'content' => $data )); if ($optional_headers !== null) { $params['http']['header'] = $optional_headers; } $ctx = stream_context_create($params); $fp = @fopen($url, 'rb', false, $ctx); if (!$fp) { throw new Exception("Problem with $url, $php_errormsg"); } $response = @stream_get_contents($fp); if ($response === false) { throw new Exception("Problem reading data from $url, $php_errormsg"); } return $response; } ?>