如果支持打开远程内容的选项的话,实际上php用fopen或file_get_contents都能获得一个网页的内容,但是默认的函数有个不足的地方就是无法获取HTTP头,这在一些特殊的应用中很不方便,如,有一个链接:
?id=18
假如它返回的是一个图片,用默认函数就很难识别,但如果通过HTTP应答头来判断就简单多了,此外如果对方通过 Refer 来防盗链的话,也是无法获取的,用HTTP类就能完美解决这些问题,而且速度也相差无几。
使用方法:
$hd = new DedeHttpDown();
$hd->OpenUrl("http://www.dedecms.com");
echo $hd->GetHtml();
//如果保存为文件则用 $hd->SaveBin("dede.html");
$hd->Close();
获得http请求头用
$hd->GetHead("key")
设置请求头
$hd->SetHead(key,value); (必须在调用 OpenUrl 之前设定)
代码如下:
<?
/* ---------------------------------------------------------------------
//织梦Http下载类V1.0
//出自:织梦之旅
//作者: IT柏拉图
//时间: 2005-11-13 12:39
//声明: 首发在落伍者网站,转载请保留版权信息
--------------------------------------------------------------------- */
class DedeHttpDown
{
var $m_url = "";
var $m_urlpath = "";
var $m_scheme = "http";
var $m_host = "";
var $m_port = "80";
var $m_user = "";
var $m_pass = "";
var $m_path = "https://www.jb51.net/";
var $m_query = "";
var $m_fp = "";
var $m_error = "";
var $m_httphead = "" ;
var $m_html = "";
var $m_puthead = "";
var $BaseUrlPath = "";
var $HomeUrl = "";
var $JumpCount = 0;//防止多重重定向陷入死循环
//
//初始化系统
//
function PrivateInit($url)
{
if($url=="") return ;
$urls = "";
$urls = @parse_url($url);
$this->m_url = $url;
if(is_array($urls))
{
$this->m_host = $urls["host"];
if(!empty($urls["scheme"])) $this->m_scheme = $urls["scheme"];
if(!empty($urls["user"])){
$this->m_user = $urls["user"];
}
if(!empty($urls["pass"])){
$this->m_pass = $urls["pass"];
}
if(!empty($urls["port"])){
$this->m_port = $urls["port"];
}
if(!empty($urls["path"])) $this->m_path = $urls["path"];
$this->m_urlpath = $this->m_path;
if(!empty($urls["query"])){
$this->m_query = $urls["query"];
$this->m_urlpath .= "?".$this->m_query;
}
$this->HomeUrl = $urls["host"];
$this->BaseUrlPath = $this->HomeUrl.$urls["path"];
$this->BaseUrlPath = ereg_replace("/([^/]*)\.(.*)$","https://www.jb51.net/",$this->BaseUrlPath);
$this->BaseUrlPath = ereg_replace("/$","",$this->BaseUrlPath);
}
}
//
//打开指定网址
//
function OpenUrl($url)
{
//重设各参数
$this->m_url = "";
$this->m_urlpath = "";
$this->m_scheme = "http";
$this->m_host = "";
$this->m_port = "80";
$this->m_user = "";
$this->m_pass = "";
$this->m_path = "https://www.jb51.net/";
$this->m_query = "";
$this->m_error = "";
$this->JumpCount = 0;
$this->m_httphead = Array() ;
//$this->m_puthead = "";
$this->m_html = "";
$this->Close();
//初始化系统
$this->PrivateInit($url);
$this->PrivateStartSession();
}
//
//打开303重定向网址
//
function JumpOpenUrl($url)
{
//重设各参数
$this->m_url = "";
$this->m_urlpath = "";
$this->m_scheme = "http";
$this->m_host = "";
$this->m_port = "80";
$this->m_user = "";
$this->m_pass = "";
$this->m_path = "https://www.jb51.net/";
$this->m_query = "";
$this->m_error = "";
$this->JumpCount++;
$this->m_httphead = Array() ;
$this->m_html = "";
$this->Close();
//初始化系统
$this->PrivateInit($url);
$this->PrivateStartSession();
}
//
//获得某操作错误的原因
//
function printError()
{
echo "错误信息:".$this->m_error;
echo "具体返回头:<br>";
foreach($this->m_httphead as $k=>$v)
{ echo "$k => $v <br>\r\n"; }
}
//
//判别用Get方法发送的头的应答结果是否正确
//
function IsGetOK()
{
if( ereg("^2",$this->GetHead("http-state")) )
{ return true; }
else
{
$this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."<br>";
return false;
}
}
//
//看看返回的网页是否是text类型
//
function IsText()
{
if(ereg("^2",$this->GetHead("http-state"))
&& eregi("^text",$this->GetHead("content-type")))
{ return true; }
else
{
$this->m_error .= "内容为非文本类型或网址重定向<br>";
return false;
}
}
//
//判断返回的网页是否是特定的类型
//
function IsContentType($ctype)
{
if(ereg("^2",$this->GetHead("http-state"))
&& $this->GetHead("content-type")==strtolower($ctype))
{ return true; }
else
{
$this->m_error .= "类型不对 ".$this->GetHead("content-type")."<br>";
return false;
}
}
//
//用Http协议下载文件
//
function SaveToBin($savefilename)
{
if(!$this->IsGetOK()) return false;
if(@feof($this->m_fp)) { $this->m_error = "连接已经关闭!"; return false; }
$fp = fopen($savefilename,"w");
while(!feof($this->m_fp)){
fwrite($fp,fread($this->m_fp,1024));
}
fclose($this->m_fp);
fclose($fp);
return true;
}
//
//保存网页内容为Text文件
//
function SaveToText($savefilename)
{
if($this->IsText()) $this->SaveBinFile($savefilename);
else return "";
}
//
//用Http协议获得一个网页的内容
//
function GetHtml()
{
if(!$this->IsText()) return "";
if($this->m_html!="") return $this->m_html;
if(!$this->m_fp||@feof($this->m_fp)) return "";
while(!feof($this->m_fp)){
$this->m_html .= fgets($this->m_fp,256);
}
@fclose($this->m_fp);
return $this->m_html;
}
//
//开始HTTP会话
//
function PrivateStartSession()
{
if(!$this->PrivateOpenHost()){
$this->m_error .= "打开远程主机出错!";
return false;
}
if($this->GetHead("http-edition")=="HTTP/1.1") $httpv = "HTTP/1.1";
else $httpv = "HTTP/1.0";
//发送固定的起始请求头GET、Host信息
fputs($this->m_fp,"GET ".$this->m_urlpath." $httpv\r\n");
$this->m_puthead["Host"] = $this->m_host;
//发送用户自定义的请求头
if(!isset($this->m_puthead["Accept"])) { $this->m_puthead["Accept"] = "*/*"; }
if(!isset($this->m_puthead["User-Agent"])) { $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)"; }
if(!isset($this->m_puthead["Refer"])) { $this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"]; }
foreach($this->m_puthead as $k=>$v){
$k = trim($k);
$v = trim($v);
if($k!=""&&$v!=""){
fputs($this->m_fp,"$k: $v\r\n");
}
}
//发送固定的结束请求头
//HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
if($httpv=="HTTP/1.1") fputs($this->m_fp,"Connection: Close\r\n\r\n");
else fputs($this->m_fp,"\r\n");
//获取应答头状态信息
$httpstas = explode(" ",fgets($this->m_fp,256));
$this->m_httphead["http-edition"] = trim($httpstas[0]);
$this->m_httphead["http-state"] = trim($httpstas[1]);
$this->m_httphead["http-describe"] = "";
for($i=2;$i<count($httpstas);$i++){
$this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);
}
//获取详细应答头
while(!feof($this->m_fp)){
$line = trim(fgets($this->m_fp,256));
if($line == "") break;
$hkey = "";
$hvalue = "";
$v = 0;
for($i=0;$i<strlen($line);$i++){
if($v==1) $hvalue .= $line[$i];
if($line[$i]==":") $v = 1;
if($v==0) $hkey .= $line[$i];
}
$hkey = trim($hkey);
if($hkey!="") $this->m_httphead[strtolower($hkey)] = trim($hvalue);
}
//判断是否是3xx开头的应答
if(ereg("^3",$this->m_httphead["http-state"]))
{
if($this->JumpCount > 3) return;
if(isset($this->m_httphead["location"])){
$newurl = $this->m_httphead["location"];
if(eregi("^http",$newurl)){
$this->JumpOpenUrl($newurl);
}
else{
$newurl = $this->FillUrl($newurl);
$this->JumpOpenUrl($newurl);
}
}
else
{ $this->m_error = "无法识别的转移应答!"; }
}//
}
//
//获得一个Http头的值
//
function GetHead($headname)
{
$headname = strtolower($headname);
if(isset($this->m_httphead[$headname]))
return $this->m_httphead[$headname];
else
return "";
}
//
//设置Http头的值
//
function SetHead($skey,$svalue)
{
$this->m_puthead[$skey] = $svalue;
}
//
//打开连接
//
function PrivateOpenHost()
{
if($this->m_host=="") return false;
$this->m_fp = @fsockopen($this->m_host, $this->m_port, &$errno, &$errstr,10);
if(!$this->m_fp){
$this->m_error = $errstr;
return false;
}
else{
return true;
}
}
//
//关闭连接
//
function Close(){
@fclose($this->m_fp);
}
//
//补全相对网址
//
function FillUrl($surl)
{
$i = 0;
$dstr = "";
$pstr = "";
$okurl = "";
$pathStep = 0;
$surl = trim($surl);
if($surl=="") return "";
$pos = strpos($surl,"#");
if($pos>0) $surl = substr($surl,0,$pos);
if($surl[0]=="https://www.jb51.net/"){
$okurl = "http://".$this->HomeUrl."https://www.jb51.net/".$surl;
}
else if($surl[0]==".")
{
if(strlen($surl)<=2) return "";
else if($surl[0]=="https://www.jb51.net/")
{
$okurl = "http://".$this->BaseUrlPath."https://www.jb51.net/".substr($surl,2,strlen($surl)-2);
}
else{
$urls = explode("https://www.jb51.net/",$surl);
foreach($urls as $u){
if($u=="..") $pathStep++;
else if($i<count($urls)-1) $dstr .= $urls[$i]."https://www.jb51.net/";
else $dstr .= $urls[$i];
$i++;
}
$urls = explode("https://www.jb51.net/",$this->BaseUrlPath);
if(count($urls) <= $pathStep)
return "";
else{
$pstr = "http://";
for($i=0;$i<count($urls)-$pathStep;$i++)
{ $pstr .= $urls[$i]."https://www.jb51.net/"; }
$okurl = $pstr.$dstr;
}
}
}
else
{
if(strlen($surl)<7)
$okurl = "http://".$this->BaseUrlPath."https://www.jb51.net/".$surl;
else if(strtolower(substr($surl,0,7))=="http://")
$okurl = $surl;
else
$okurl = "http://".$this->BaseUrlPath."https://www.jb51.net/".$surl;
}
$okurl = eregi_replace("^()","",$okurl);
$okurl = eregi_replace("/{1,}","https://www.jb51.net/",$okurl);
return "http://".$okurl;
}
}
?>
您可能感兴趣的文章: