- 论坛徽章:
- 0
|
- <?php
- header("Content-type: text/html; charset=utf-8");
- @set_time_limit(0);
- error_reporting(E_ALL);
- $cookie_file = tempnam("./temp","cookie") or die('cookie create failed');
- $post_url="http://www.jqw888.com/user.php";
- $post="username=request&password=123456&act=act_login&back_act=user.php&submit=";
- $login=curl_init($post_url);
- curl_setopt($login,CURLOPT_HEADER,0);
- curl_setopt($login,CURLOPT_RETURNTRANSFER,1);
- curl_setopt($login,CURLOPT_POST,1);
- curl_setopt($login,CURLOPT_COOKIEJAR,$cookie_file);
- curl_setopt($login,CURLOPT_POSTFIELDS,$post);
- $data=curl_exec($login);
- curl_close($login);
-
- $urls = file('./url-list.txt');//璇诲彇URL鐨勬枃浠?
- $regex_name = '/<td.*?>濮撳悕锛?\/td>\s*?<td.*?>(.*?)<\/td>/i';
- $regex_phone= '/<td.*?>鑱旂郴鐢佃瘽锛?\/td>\s*?<td.*?>(.*?)<\/td>/i';
- $fp = @fopen('./user-list.txt','w');//鍐欏叆淇℃伅鐨勯棶棰?
- $i = 1;
- foreach($urls as $url){
- $tmp_name = $tmp_phone = array();
- if(trim($url) == 'END')break;
- $cont=curl_init(trim($url));
- curl_setopt($cont,CURLOPT_HEADER,0);
- curl_setopt($cont,CURLOPT_RETURNTRANSFER,1);
- curl_setopt($cont,CURLOPT_COOKIEFILE,$cookie_file);
- $contes=curl_exec($cont);
- curl_close($cont);
- preg_match_all($regex_name,$contes,$tmp_name);
- preg_match_all($regex_phone,$contes,$tmp_phone);
- $info = $i.','.$tmp_name[1][0].','.$tmp_phone[1][0]."\r\n";
- echo nl2br($info);
- fwrite($fp,$info);
- flush();
- ob_flush();
- sleep(1);
- $i++;
- }
- fclose($fp);
- ?>
复制代码 请问这是用ruby做的网页抓取吗?好像不用装什么东西就直接能运行。高手帮忙解释下吧,最好能标出哪句是ruby。我很菜,没学过ruby和python ,想学一门的,苦于不知如何选择!!!
|
|