php获取网站html代码
PHP如何获取需要登陆后才能看到的网页HTML代码 实际上是个模拟登陆的问题,需要写个登陆模块,解决两个问题:
1,请求登陆并刷新的函数部分:
<?php
/*****************函数部分**************************/
/*获取指定网页的内容
$url为网页地址
*/
function getcontent($url){
if($open=file($url)){
$count=count($open);
for($i=0;$i<$count;$i++)
{
$theget.=$open[$i];
}
}else{
die('请求过多,超时,请刷新');
}
return $theget;
}
?>
2,偷取程序部分,也分两部分,
1),PHP与XML不同之处是需要特殊的调用才能支持COOKIE.或者记录SessionID(后面有说明程序)
php代码如下
<?PHP
//登陆并保存COOKIE
$f = fsockopen("www.url.net",80);
$cmd = <<<EOT
GET /test/login.php?name=test&password=test HTTP/1.0
EOT;
fputs($f,$cmd);
$result = '';
$cookie = '';
$location = '';
while($line = fgets($f))
{
$result .= $line;
//取得location跟setCookie头HTTP头信息
$tmp = explode(":",$line);
if($tmp[0]=="Set-Cookie")
$cookie .= $tmp;
if($tmp[0]=="Location")
$location = $tmp;
}
fclose($f);
2),获取页面
//下面访问你要访问的页面(这部分也可以参考下面的核心例程)
$f = fsockopen("www.url.net",80);l
//下面的cookie就是发送前页保存下的的cookie
$cmd = <<<EOT
GET /test/test.php HTTP/1.0
cookie:$cookie
EOT;
fputs($f,$cmd);
while($line = fgets($f))
{
echo $line;
}
fclose($f);
?>
核心例程就是fsockopen();
不妨再给段代码你瞧瞧:
--------------------------------------------------------------------------------
function posttohost($url, $data)
{
$url = parse_url($url);
if (!$url) return "couldn't parse url";
if (!isset($url['port'])) { $url['port'] = ""; }
if (!isset($url['query'])) { $url['query'] = ""; }
$encoded = "";
while (list($k,$v) = each($data))
{
$encoded .= ($encoded ? "&" : "");
$encoded .= rawurlencode($k)."=".rawurlencode($v);
}
$fp = fsockopen($url['host'], $url['port'] ? $url['port'] : 80);
if (!$fp) return "Failed to open socket to $url[host]";
fputs($fp, sprintf("POST %s%s%s HTTP/1.0", $url['path'], $url['query'] ? "?" : "", $url['query']));
fputs($fp, "Host: $url[host]");
fputs($fp, "Content-type: application/x-www-form-urlencoded");
fputs($fp, "Content-length: " . strlen($encoded) . "");
fputs($fp, "Connection: close");
fputs($fp, "$encoded");
$line = fgets($fp,1024);
if (!eregi("^HTTP/1\.. 200", $line)) return $line ;
$results = ""; $inheader = 1;
while(!feof($fp))
{
$line = fgets($fp,1024);
if ($inheader && ($line == "" || $line == "r")) {
$inheader = 0;
}
elseif (!$inheader) {
$results .= $line;
}
}
fclose($fp);
return $results;
}
$data=array();
$data["msg"]="HELLO THIS IS TEST MSG";
$data["Type"]="TEXT";
echo posttohost("http://url/xxx", $data);
应该说明白了吧?
另外登陆部分还有一种简单方法是把SessionID保存下来
源代码:
<?php
/*
* 得到网页内容
* 参数:$host [in] string
* 主机名称(例如: www.url.com.cn)
* 参数:$method [in] string
* 提交方法:POST, GET, HEAD ... 并加上相应的参数( 具体语法参见 RFC1945,RFC2068 )
* 参数:$str [in] string
* 提交的内容
* 参数:$sessid [in] string
* PHP的SESSIONID
*
* @返回 网页内容 string
*/
function GetWebContent($host, $method, $str, $sessid = '')
{
$ip = gethostbyname($host);
$fp = fsockopen($ip, 80);
if (!$fp) return;
fputs($fp, "$methodrn");
fputs($fp, "Host: $hostrn");
if (!empty($sessid))
{
fputs($fp, "Cookie: PHPSESSID=$sessid; path=/;rn");
}
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, "Content-Length: ". strlen($str) . "rn"); // 别忘了指定长度
}
fputs($fp, "Content-Type: application/x-www-form-urlencodedrnrn");
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, $str."rn");
}
while(!feof($fp))
{
$response .= fgets($fp, 1024);
}
$hlen = strpos($response,"rnrn"); // LINUX下是 "nn"
$header = substr($response, 0, $hlen);
$entity = substr($response, $hlen + 4);
if ( preg_match('/PHPSESSID=([0-9a-z]+);/i', $header, $matches))
{
$a['sessid'] = $matches;
}
if ( preg_match('/Location: ([0-9a-z_?=&#.]+)/i', $header, $matches))
{
$a['location'] = $matches;
}
$a['content'] = $entity;
fclose($fp);
return $a;
}
/* 构造用户名,密码字符串 */
$str = ("username=test&password=test");
$response = GetWebContent("localhost","POST /login.php HTTP/1.0", $str);
echo $response['location'].$response['content']."<br>";
echo $response['sessid']."<br>";
if ( preg_match('/error.php/i',$response['location']))
{
echo "登陆失败<br>";
} else {
echo "登陆成功<br>";
// 不可以访问user.php,因为不带sessid参数
$response = GetWebContent("localhost","GET /user.php HTTP/1.0", '', '');
echo $response['location']."<br>"; // 结果:error.php?errcode=2
// 可以访问user.php
$response = GetWebContent("localhost","GET /user.php HTTP/1.0", '', $response['sessid']);
echo $response['location']."<br>"; // 结果:user.php
}
?> PHP如何获取需要登陆后才能看到的网页HTML代码实际上是个模拟登陆的问题,需要写个登陆模块,解决两个问题:
1,请求登陆并刷新的函数部分:
<?php
/*****************函数部分**************************/
/*获取指定网页的内容
$url为网页地址
*/
function getcontent($url){
if($open=file($url)){
$count=count($open);
for($i=0;$i<$count;$i++)
{
$theget.=$open[$i];
}
}else{
die('请求过多,超时,请刷新');
}
return $theget;
}
?>
2,偷取程序部分,也分两部分,
1),PHP与XML不同之处是需要特殊的调用才能支持COOKIE.或者记录SessionID(后面有说明程序)
php代码如下
<?PHP
//登陆并保存COOKIE
$f = fsockopen("www.url.net",80);
$cmd = <<<EOT
GET /test/login.php?name=test&password=test HTTP/1.0
EOT;
fputs($f,$cmd);
$result = '';
$cookie = '';
$location = '';
while($line = fgets($f))
{
$result .= $line;
//取得location跟setCookie头HTTP头信息
$tmp = explode(":",$line);
if($tmp[0]=="Set-Cookie")
$cookie .= $tmp;
if($tmp[0]=="Location")
$location = $tmp;
}
fclose($f);
2),获取页面
//下面访问你要访问的页面(这部分也可以参考下面的核心例程)
$f = fsockopen("www.url.net",80);l
//下面的cookie就是发送前页保存下的的cookie
$cmd = <<<EOT
GET /test/test.php HTTP/1.0
cookie:$cookie
EOT;
fputs($f,$cmd);
while($line = fgets($f))
{
echo $line;
}
fclose($f);
?>
核心例程就是fsockopen();
不妨再给段代码你瞧瞧:
--------------------------------------------------------------------------------
function posttohost($url, $data)
{
$url = parse_url($url);
if (!$url) return "couldn't parse url";
if (!isset($url['port'])) { $url['port'] = ""; }
if (!isset($url['query'])) { $url['query'] = ""; }
$encoded = "";
while (list($k,$v) = each($data))
{
$encoded .= ($encoded ? "&" : "");
$encoded .= rawurlencode($k)."=".rawurlencode($v);
}
$fp = fsockopen($url['host'], $url['port'] ? $url['port'] : 80);
if (!$fp) return "Failed to open socket to $url[host]";
fputs($fp, sprintf("POST %s%s%s HTTP/1.0", $url['path'], $url['query'] ? "?" : "", $url['query']));
fputs($fp, "Host: $url[host]");
fputs($fp, "Content-type: application/x-www-form-urlencoded");
fputs($fp, "Content-length: " . strlen($encoded) . "");
fputs($fp, "Connection: close");
fputs($fp, "$encoded");
$line = fgets($fp,1024);
if (!eregi("^HTTP/1\.. 200", $line)) return $line ;
$results = ""; $inheader = 1;
while(!feof($fp))
{
$line = fgets($fp,1024);
if ($inheader && ($line == "" || $line == "r")) {
$inheader = 0;
}
elseif (!$inheader) {
$results .= $line;
}
}
fclose($fp);
return $results;
}
$data=array();
$data["msg"]="HELLO THIS IS TEST MSG";
$data["Type"]="TEXT";
echo posttohost("http://url/xxx", $data);
应该说明白了吧?
另外登陆部分还有一种简单方法是把SessionID保存下来
源代码:
<?php
/*
* 得到网页内容
* 参数:$host [in] string
* 主机名称(例如: www.url.com.cn)
* 参数:$method [in] string
* 提交方法:POST, GET, HEAD ... 并加上相应的参数( 具体语法参见 RFC1945,RFC2068 )
* 参数:$str [in] string
* 提交的内容
* 参数:$sessid [in] string
* PHP的SESSIONID
*
* @返回 网页内容 string
*/
function GetWebContent($host, $method, $str, $sessid = '')
{
$ip = gethostbyname($host);
$fp = fsockopen($ip, 80);
if (!$fp) return;
fputs($fp, "$methodrn");
fputs($fp, "Host: $hostrn");
if (!empty($sessid))
{
fputs($fp, "Cookie: PHPSESSID=$sessid; path=/;rn");
}
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, "Content-Length: ". strlen($str) . "rn"); // 别忘了指定长度
}
fputs($fp, "Content-Type: application/x-www-form-urlencodedrnrn");
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, $str."rn");
}
while(!feof($fp))
{
$response .= fgets($fp, 1024);
}
$hlen = strpos($response,"rnrn"); // LINUX下是 "nn"
$header = substr($response, 0, $hlen);
$entity = substr($response, $hlen + 4);
if ( preg_match('/PHPSESSID=([0-9a-z]+);/i', $header, $matches))
{
$a['sessid'] = $matches;
}
if ( preg_match('/Location: ([0-9a-z_?=&#.]+)/i', $header, $matches))
{
$a['location'] = $matches;
}
$a['content'] = $entity;
fclose($fp);
return $a;
}
/* 构造用户名,密码字符串 */
$str = ("username=test&password=test");
$response = GetWebContent("localhost","POST /login.php HTTP/1.0", $str);
echo $response['location'].$response['content']."<br>";
echo $response['sessid']."<br>";
if ( preg_match('/error.php/i',$response['location']))
{
echo "登陆失败<br>";
} else {
echo "登陆成功<br>";
// 不可以访问user.php,因为不带sessid参数
$response = GetWebContent("localhost","GET /user.php HTTP/1.0", '', '');
echo $response['location']."<br>"; // 结果:error.php?errcode=2
// 可以访问user.php
$response = GetWebContent("localhost","GET /user.php HTTP/1.0", '', $response['sessid']);
echo $response['location']."<br>"; // 结果:user.php
}
?> (随机推荐阅读本站500篇优秀文章点击前往:500篇优秀随机文章)
1,请求登陆并刷新的函数部分:
<?php
/*****************函数部分**************************/
/*获取指定网页的内容
$url为网页地址
*/
function getcontent($url){
if($open=file($url)){
$count=count($open);
for($i=0;$i<$count;$i++)
{
$theget.=$open[$i];
}
}else{
die('请求过多,超时,请刷新');
}
return $theget;
}
?>
2,偷取程序部分,也分两部分,
1),PHP与XML不同之处是需要特殊的调用才能支持COOKIE.或者记录SessionID(后面有说明程序)
php代码如下
<?PHP
//登陆并保存COOKIE
$f = fsockopen("www.url.net",80);
$cmd = <<<EOT
GET /test/login.php?name=test&password=test HTTP/1.0
EOT;
fputs($f,$cmd);
$result = '';
$cookie = '';
$location = '';
while($line = fgets($f))
{
$result .= $line;
//取得location跟setCookie头HTTP头信息
$tmp = explode(":",$line);
if($tmp[0]=="Set-Cookie")
$cookie .= $tmp;
if($tmp[0]=="Location")
$location = $tmp;
}
fclose($f);
2),获取页面
//下面访问你要访问的页面(这部分也可以参考下面的核心例程)
$f = fsockopen("www.url.net",80);l
//下面的cookie就是发送前页保存下的的cookie
$cmd = <<<EOT
GET /test/test.php HTTP/1.0
cookie:$cookie
EOT;
fputs($f,$cmd);
while($line = fgets($f))
{
echo $line;
}
fclose($f);
?>
核心例程就是fsockopen();
不妨再给段代码你瞧瞧:
--------------------------------------------------------------------------------
function posttohost($url, $data)
{
$url = parse_url($url);
if (!$url) return "couldn't parse url";
if (!isset($url['port'])) { $url['port'] = ""; }
if (!isset($url['query'])) { $url['query'] = ""; }
$encoded = "";
while (list($k,$v) = each($data))
{
$encoded .= ($encoded ? "&" : "");
$encoded .= rawurlencode($k)."=".rawurlencode($v);
}
$fp = fsockopen($url['host'], $url['port'] ? $url['port'] : 80);
if (!$fp) return "Failed to open socket to $url[host]";
fputs($fp, sprintf("POST %s%s%s HTTP/1.0", $url['path'], $url['query'] ? "?" : "", $url['query']));
fputs($fp, "Host: $url[host]");
fputs($fp, "Content-type: application/x-www-form-urlencoded");
fputs($fp, "Content-length: " . strlen($encoded) . "");
fputs($fp, "Connection: close");
fputs($fp, "$encoded");
$line = fgets($fp,1024);
if (!eregi("^HTTP/1\.. 200", $line)) return $line ;
$results = ""; $inheader = 1;
while(!feof($fp))
{
$line = fgets($fp,1024);
if ($inheader && ($line == "" || $line == "r")) {
$inheader = 0;
}
elseif (!$inheader) {
$results .= $line;
}
}
fclose($fp);
return $results;
}
$data=array();
$data["msg"]="HELLO THIS IS TEST MSG";
$data["Type"]="TEXT";
echo posttohost("http://url/xxx", $data);
应该说明白了吧?
另外登陆部分还有一种简单方法是把SessionID保存下来
源代码:
<?php
/*
* 得到网页内容
* 参数:$host [in] string
* 主机名称(例如: www.url.com.cn)
* 参数:$method [in] string
* 提交方法:POST, GET, HEAD ... 并加上相应的参数( 具体语法参见 RFC1945,RFC2068 )
* 参数:$str [in] string
* 提交的内容
* 参数:$sessid [in] string
* PHP的SESSIONID
*
* @返回 网页内容 string
*/
function GetWebContent($host, $method, $str, $sessid = '')
{
$ip = gethostbyname($host);
$fp = fsockopen($ip, 80);
if (!$fp) return;
fputs($fp, "$methodrn");
fputs($fp, "Host: $hostrn");
if (!empty($sessid))
{
fputs($fp, "Cookie: PHPSESSID=$sessid; path=/;rn");
}
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, "Content-Length: ". strlen($str) . "rn"); // 别忘了指定长度
}
fputs($fp, "Content-Type: application/x-www-form-urlencodedrnrn");
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, $str."rn");
}
while(!feof($fp))
{
$response .= fgets($fp, 1024);
}
$hlen = strpos($response,"rnrn"); // LINUX下是 "nn"
$header = substr($response, 0, $hlen);
$entity = substr($response, $hlen + 4);
if ( preg_match('/PHPSESSID=([0-9a-z]+);/i', $header, $matches))
{
$a['sessid'] = $matches;
}
if ( preg_match('/Location: ([0-9a-z_?=&#.]+)/i', $header, $matches))
{
$a['location'] = $matches;
}
$a['content'] = $entity;
fclose($fp);
return $a;
}
/* 构造用户名,密码字符串 */
$str = ("username=test&password=test");
$response = GetWebContent("localhost","POST /login.php HTTP/1.0", $str);
echo $response['location'].$response['content']."<br>";
echo $response['sessid']."<br>";
if ( preg_match('/error.php/i',$response['location']))
{
echo "登陆失败<br>";
} else {
echo "登陆成功<br>";
// 不可以访问user.php,因为不带sessid参数
$response = GetWebContent("localhost","GET /user.php HTTP/1.0", '', '');
echo $response['location']."<br>"; // 结果:error.php?errcode=2
// 可以访问user.php
$response = GetWebContent("localhost","GET /user.php HTTP/1.0", '', $response['sessid']);
echo $response['location']."<br>"; // 结果:user.php
}
?> PHP如何获取需要登陆后才能看到的网页HTML代码实际上是个模拟登陆的问题,需要写个登陆模块,解决两个问题:
1,请求登陆并刷新的函数部分:
<?php
/*****************函数部分**************************/
/*获取指定网页的内容
$url为网页地址
*/
function getcontent($url){
if($open=file($url)){
$count=count($open);
for($i=0;$i<$count;$i++)
{
$theget.=$open[$i];
}
}else{
die('请求过多,超时,请刷新');
}
return $theget;
}
?>
2,偷取程序部分,也分两部分,
1),PHP与XML不同之处是需要特殊的调用才能支持COOKIE.或者记录SessionID(后面有说明程序)
php代码如下
<?PHP
//登陆并保存COOKIE
$f = fsockopen("www.url.net",80);
$cmd = <<<EOT
GET /test/login.php?name=test&password=test HTTP/1.0
EOT;
fputs($f,$cmd);
$result = '';
$cookie = '';
$location = '';
while($line = fgets($f))
{
$result .= $line;
//取得location跟setCookie头HTTP头信息
$tmp = explode(":",$line);
if($tmp[0]=="Set-Cookie")
$cookie .= $tmp;
if($tmp[0]=="Location")
$location = $tmp;
}
fclose($f);
2),获取页面
//下面访问你要访问的页面(这部分也可以参考下面的核心例程)
$f = fsockopen("www.url.net",80);l
//下面的cookie就是发送前页保存下的的cookie
$cmd = <<<EOT
GET /test/test.php HTTP/1.0
cookie:$cookie
EOT;
fputs($f,$cmd);
while($line = fgets($f))
{
echo $line;
}
fclose($f);
?>
核心例程就是fsockopen();
不妨再给段代码你瞧瞧:
--------------------------------------------------------------------------------
function posttohost($url, $data)
{
$url = parse_url($url);
if (!$url) return "couldn't parse url";
if (!isset($url['port'])) { $url['port'] = ""; }
if (!isset($url['query'])) { $url['query'] = ""; }
$encoded = "";
while (list($k,$v) = each($data))
{
$encoded .= ($encoded ? "&" : "");
$encoded .= rawurlencode($k)."=".rawurlencode($v);
}
$fp = fsockopen($url['host'], $url['port'] ? $url['port'] : 80);
if (!$fp) return "Failed to open socket to $url[host]";
fputs($fp, sprintf("POST %s%s%s HTTP/1.0", $url['path'], $url['query'] ? "?" : "", $url['query']));
fputs($fp, "Host: $url[host]");
fputs($fp, "Content-type: application/x-www-form-urlencoded");
fputs($fp, "Content-length: " . strlen($encoded) . "");
fputs($fp, "Connection: close");
fputs($fp, "$encoded");
$line = fgets($fp,1024);
if (!eregi("^HTTP/1\.. 200", $line)) return $line ;
$results = ""; $inheader = 1;
while(!feof($fp))
{
$line = fgets($fp,1024);
if ($inheader && ($line == "" || $line == "r")) {
$inheader = 0;
}
elseif (!$inheader) {
$results .= $line;
}
}
fclose($fp);
return $results;
}
$data=array();
$data["msg"]="HELLO THIS IS TEST MSG";
$data["Type"]="TEXT";
echo posttohost("http://url/xxx", $data);
应该说明白了吧?
另外登陆部分还有一种简单方法是把SessionID保存下来
源代码:
<?php
/*
* 得到网页内容
* 参数:$host [in] string
* 主机名称(例如: www.url.com.cn)
* 参数:$method [in] string
* 提交方法:POST, GET, HEAD ... 并加上相应的参数( 具体语法参见 RFC1945,RFC2068 )
* 参数:$str [in] string
* 提交的内容
* 参数:$sessid [in] string
* PHP的SESSIONID
*
* @返回 网页内容 string
*/
function GetWebContent($host, $method, $str, $sessid = '')
{
$ip = gethostbyname($host);
$fp = fsockopen($ip, 80);
if (!$fp) return;
fputs($fp, "$methodrn");
fputs($fp, "Host: $hostrn");
if (!empty($sessid))
{
fputs($fp, "Cookie: PHPSESSID=$sessid; path=/;rn");
}
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, "Content-Length: ". strlen($str) . "rn"); // 别忘了指定长度
}
fputs($fp, "Content-Type: application/x-www-form-urlencodedrnrn");
if ( substr(trim($method),0, 4) == "POST")
{
fputs($fp, $str."rn");
}
while(!feof($fp))
{
$response .= fgets($fp, 1024);
}
$hlen = strpos($response,"rnrn"); // LINUX下是 "nn"
$header = substr($response, 0, $hlen);
$entity = substr($response, $hlen + 4);
if ( preg_match('/PHPSESSID=([0-9a-z]+);/i', $header, $matches))
{
$a['sessid'] = $matches;
}
if ( preg_match('/Location: ([0-9a-z_?=&#.]+)/i', $header, $matches))
{
$a['location'] = $matches;
}
$a['content'] = $entity;
fclose($fp);
return $a;
}
/* 构造用户名,密码字符串 */
$str = ("username=test&password=test");
$response = GetWebContent("localhost","POST /login.php HTTP/1.0", $str);
echo $response['location'].$response['content']."<br>";
echo $response['sessid']."<br>";
if ( preg_match('/error.php/i',$response['location']))
{
echo "登陆失败<br>";
} else {
echo "登陆成功<br>";
// 不可以访问user.php,因为不带sessid参数
$response = GetWebContent("localhost","GET /user.php HTTP/1.0", '', '');
echo $response['location']."<br>"; // 结果:error.php?errcode=2
// 可以访问user.php
$response = GetWebContent("localhost","GET /user.php HTTP/1.0", '', $response['sessid']);
echo $response['location']."<br>"; // 结果:user.php
}
?> (随机推荐阅读本站500篇优秀文章点击前往:500篇优秀随机文章)
来源:本文由易搜IT博客原创撰写,欢迎分享本文,转载请保留出处和链接!