php版爱站关键词采集器
<?php header("Content-type: text/html; charset=utf-8"); $word=$_GET['word']; $username = "****@163.com";//你的爱站账户 $passwd = "***";//你的爱站密码 $login_url = "http://www.aizhan.com/login.php"; $user_agent = "Mozilla/5.0 (Windows NT 6.2; rv:17.0) Gecko/20100101 Firefox/17.0"; //获取sessionid $session_header = array (); $session_header [] = "Host: www.aizhan.com"; $session_header [] = "User-Agent: " . $user_agent; $session_header [] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; $session_header [] = "Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"; $session_header [] = "Accept-Encoding: gzip, deflate"; $session_header [] = "Connection: keep-alive"; $ckfile = tempnam ( "./temp", "tem" ); $curl = curl_init (); curl_setopt ( $curl, CURLOPT_URL, $login_url ); curl_setopt ( $curl, CURLOPT_USERAGENT, $user_agent ); curl_setopt ( $curl, CURLOPT_HTTPHEADER, $session_header ); curl_setopt ( $curl, CURLOPT_ENCODING, 'gzip, deflate' ); curl_setopt ( $curl, CURLOPT_HEADER, 1 ); curl_setopt ( $curl, CURLOPT_AUTOREFERER, true ); curl_setopt ( $curl, CURLOPT_COOKIEJAR, $ckfile ); curl_setopt ( $curl, CURLOPT_RETURNTRANSFER, true ); curl_setopt ( $curl, CURLOPT_TIMEOUT, 15 ); $html = curl_exec ( $curl ); curl_close ( $curl ); $cookie_str = file ( $ckfile ); foreach ( $cookie_str as $v ) { if (stripos ( $v, 'PHPSESSID' ) != FALSE) { $cook = preg_split ( "/[\s]+/", $v ); $result = array_search ( "PHPSESSID", $cook ); if ($result != FALSE) { $cookie ['PHPSESSID'] = $cook [$result + 1]; } } } unset ( $session_header ); unset ( $curl ); $cookie_str = "PHPSESSID=".$cookie ['PHPSESSID']; //登陆aizhan****************************************************************************** //$ckfile2 = tempnam ("./temp", "login"); $login_header = array (); $login_header [] = "Host: www.aizhan.com"; $login_header [] = "User-Agent: " . $user_agent; $login_header [] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; $login_header [] = "Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"; $login_header [] = "Accept-Encoding: gzip, deflate"; $login_header [] = "Connection: keep-alive"; $login_header [] = "Referer: http://www.aizhan.com/login.php"; $login_header [] = "Cookie: " . $cookie_str; $login_header [] = "Content-Type: application/x-www-form-urlencoded"; //$login_header[] = "Content-Length: 51"; $login_post = array ('refer' => '', 'email' => $username, 'password' => $passwd ); //print_r ( $login_header ); $post_str = ''; foreach ( $login_post as $k => $v ) { $post_str .= $k . '=' . $v . '&'; } $post_str = substr ( $post_str, 0, - 1 ); $curl = curl_init (); curl_setopt ( $curl, CURLOPT_URL, $login_url ); curl_setopt ( $curl, CURLOPT_USERAGENT, $user_agent ); curl_setopt ( $curl, CURLOPT_HTTPHEADER, $login_header ); curl_setopt ( $curl, CURLOPT_POST, 1 ); curl_setopt ( $curl, CURLOPT_POSTFIELDS, $post_str ); curl_setopt ( $curl, CURLOPT_ENCODING, 'gzip, deflate' ); curl_setopt ( $curl, CURLOPT_HEADER, 1 ); curl_setopt ( $curl, CURLOPT_AUTOREFERER, true ); curl_setopt ( $curl, CURLOPT_COOKIEJAR, $ckfile ); curl_setopt ( $curl, CURLOPT_COOKIEFILE, $ckfile ); curl_setopt ( $curl, CURLOPT_RETURNTRANSFER, true ); curl_setopt ( $curl, CURLOPT_TIMEOUT, 15 ); $html = curl_exec ( $curl ); curl_close ( $curl ); //获取指数 $cookie_str = file($ckfile); $cookie_str = file($ckfile); foreach($cookie_str as $v){ if(stripos($v,'PHPSESSID')!=FALSE){ $cook = preg_split("/[\s]+/", $v); $result = array_search("PHPSESSID", $cook); if($result!=FALSE){ $cookie['PHPSESSID']=$cook[$result+1]; } } if(stripos($v,'userId')!=FALSE){ $cook = preg_split("/[\s]+/", $v); $result = array_search("userId", $cook); if($result!=FALSE){ $cookie['userId']=$cook[$result+1]; } } if(stripos($v,'userName')!=FALSE){ $cook = preg_split("/[\s]+/", $v); $result = array_search("userName", $cook); if($result!=FALSE){ $cookie['userName']=$cook[$result+1]; } } if(stripos($v,'userGroup')!=FALSE){ $cook = preg_split("/[\s]+/", $v); $result = array_search("userGroup", $cook); if($result!=FALSE){ $cookie['userGroup']=$cook[$result+1]; } } if(stripos($v,'userSecure')!=FALSE){ $cook = preg_split("/[\s]+/", $v); $result = array_search("userSecure", $cook); if($result!=FALSE){ $cookie['userSecure']=$cook[$result+1]; } } } $cookie_str = "userId={$cookie['userId']}; userName={$cookie['userName']}; userGroup={$cookie['userGroup']}; userSecure={$cookie['userSecure']}"; //echo $cookie_str; //echo $ckfile; //exit(); $enword = urlencode($word); $target_url = "http://ci.aizhan.com/{$enword}/"; $search_header = array (); $search_header [] = "Host: ci.aizhan.com"; $search_header [] = "User-Agent: " . $user_agent; $search_header [] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; $search_header [] = "Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"; $search_header [] = "Accept-Encoding: gzip, deflate"; $search_header [] = "Connection: keep-alive"; $search_header [] = "Cookie: " . $cookie_str; //print_r($cookie_str); $curl = curl_init (); curl_setopt ( $curl, CURLOPT_URL, $target_url ); curl_setopt ( $curl, CURLOPT_USERAGENT, $user_agent ); curl_setopt ( $curl, CURLOPT_HTTPHEADER, $search_header ); curl_setopt ( $curl, CURLOPT_ENCODING, 'gzip, deflate' ); curl_setopt ( $curl, CURLOPT_HEADER, 1 ); curl_setopt ( $curl, CURLOPT_AUTOREFERER, true ); curl_setopt ( $curl, CURLOPT_COOKIEJAR, $ckfile ); //curl_setopt ( $curl, CURLOPT_COOKIEFILE, $ckfile ); curl_setopt ( $curl, CURLOPT_RETURNTRANSFER, true ); curl_setopt ( $curl, CURLOPT_TIMEOUT, 15 ); $html = curl_exec ( $curl ); curl_close ( $curl ); if(!preg_match_all('/ /Uis',$html,$words)){ echo "error"; } $result = array(); foreach ($words[0] as $k=>$v){ preg_match_all('/ /Uis',$v,$key); $result[] = array('word'=>strip_tags($key[0][0]),'index'=>strip_tags($key[0][1]),'record'=>strip_tags($key[0][2])); } print_r($result); ?>
本文出自 蓝鹰博客,转载时请注明出处及相应链接。
本文永久链接: http://www.lanyingblog.com/blog/1864.html