php知道与问问的采集插件代码(2)


<?php
session_start();
header("content-type:text/html;charset=utf-8");
require("stole_config.php");
require("conn.php");
require("keyword.php");
if(!empty($_POST['ask']))
{
$ask=urlencode(trim($_POST['ask']));//获取表单提交的问题
$sp="S".$ask;
}else
{
$sp=urlencode($_GET['sp']);
}
if(empty($_GET['jl']))
{
$_GET['jl']=1;
}
$jl=$_GET['jl'];
$pg=intval($_GET['pg']);//获取页数
$rs=intval($_GET['rs']);//获得 记录的参数
if($rs>9)
{
$rs=0;
$pg++;
}
if($pg>51)
{
echo "采集完毕! 总共采集 ".urldecode($sp)." ".$jl."条记录";
exit();
}
if($sp)//有设定答案才开始
{
$str=@file_get_contents("http://wenwen.soso.com/z/Search.e?sp={$sp}&pg={$pg}");
@preg_match("/<ol class=https://www.jb51.net/article/\"result_list\">(.*)<\/ol>/iUs",$str,$asklist);//获取问答列表
//echo $asklist[1];
$url="/<a target=https://www.jb51.net/article/\"_blank\" href=https://www.jb51.net/article/\"\/z\/(q.*\.htm)/iUs";
@preg_match_all($url,$asklist[1],$urllist);//获取 所有的问题
$t=$urllist[1][$rs];
$uid=$t;
$suid="ww{$uid}";
$sct=mysql_query("select count(*) from {$table_prefix}c_article where suid='$suid' ");
$sct=mysql_fetch_array($sct);
$sct=$sct[0];
if($sct==0)
{
$html=@file_get_contents("http://wenwen.soso.com/z/${t}");
$html=str_replace("<pre>","",str_replace("</pre>","",$html));
$html=str_replace("<br/><br/><br/>","<br/><br/>",$html);
//echo $html;
@preg_match("/<div class=https://www.jb51.net/article/\"question_main\">.*<h3>(.*)<\/h3>/iUs",$html,$ask_title);
$art_title=$ask_title[1];
@preg_match("/<div class=https://www.jb51.net/article/\"answer_con\">(.*)<\/div>/iUs",$html,$answer);
$j=count($answer)-1;
$art_content="";//商品详细
for($i=$j;$i>=1;$i--)
{
if(strlen($answer[$i])>$min_t1)
{
$art_content .= $answer[$i];
}
}
$art_content=trim($art_content);
$s1="/(<a .*>)(.*)<\/a>/iUs";
$art_content=preg_replace($s1,${2},trim($art_content));
$word_arr=explode(",",iconv("gbk","utf-8",$cj_word));
$word_allow=false;//初始化是否允许采集
$word_count=count($word_arr);//总数
for($i=0;$i<$word_count;$i++)
{
if(substr_count($art_title,$word_arr[$i])>0)
{
$word_allow=1;
$i=$word_count;
}
}
if($word_allow)//如果合法
{ //开始处理数据库
if(strlen($art_content)>$min_t2)
{
echo "<font color=red>添加中............................</font><br>";
echo $art_title."<br>";
$art_title=iconv('utf-8','gbk', $art_title);
$title_ct=mysql_query("select count(*) from {$table_prefix}c_article where art_title ='$art_title' ");//查看标题是否重复
$title_ct=@mysql_fetch_array($title_ct);
$title_ct=$title_ct[0];
if($title_ct>0)
{
$art_title .="{$same_title}{$title_ct}";
}
$art_content=iconv('utf-8','gbk',str_replace("\r\n","<br>",$art_content));
$art_content=strtr($art_content,$keyword);
$art_time=date("Y-m-d");
$sql="insert into {$table_prefix}c_article(art_title,art_content,art_time,art_author,suid) values('$art_title','$art_content','$art_time','$art_author','$suid')";//插入采集表
mysql_query($sql);
if(empty($t_catx_id))//如果无分类
{
$sql2="insert into {$t_table}({$t_art_title},{$t_art_content},{$t_art_time},{$t_artx_author}) values('$art_title','$art_content','$art_time','$art_author')";
}else
{
$sql2="insert into {$t_table}({$t_art_title},{$t_art_content},{$t_art_time},{$t_artx_author},{$t_catx_id}) values('$art_title','$art_content','$art_time','$art_author','$cat_id')";
}
mysql_query($sql2);//插入文章表
$jl++;//如果存放数据库中 则记录加1
//处理数据库结束
}else
{
echo "长度不够";
}
}else
{
echo "主题不符合要求";
}
}else
{
echo "已经存在";
}
$rs++;
//记录下本次采集 的状况
$f_tt= urldecode($sp)."--页数".$pg." 记录数 ".$jl ;
file_put_contents("ss.txt",$f_tt);
echo "<script>location.href='wenwen.php?jl=".$jl."&sp=".$sp."&pg=".$pg."&rs=".$rs." ';</script>";
exit();
}
?>
<link href="https://www.jb51.net/style.css" type="text/css" />
<table cellspacing="1" bgcolor="#CCCCCC">
<tr>
<td bgcolor="#00CC00"><h1><a href="https://www.jianlila.com">荐礼啦</a>知道问问采集插件</h1></td>
</tr>
</table>
<table cellspacing="1" bgcolor="#CCCCCC">
<tr>
<td bgcolor="#FFFFFF"><a href="https://www.jb51.net/cj_config.php">采集设置</a> <a href="https://www.jb51.net/uninstall.php">卸载采集</a>&nbsp;<a href="https://www.jb51.net/cj_view.php">查看采集记录</a>&nbsp;<a href="https://www.jb51.net/cj_help.php">采集帮助</a> <a href="https://www.jb51.net/baidu.php" target="_blank">知道采集</a> &nbsp;<a href="https://www.jb51.net/wenwen.php" target="_blank">问问采集</a></td>
</tr>
</table>
<form action="https://www.jb51.net/wenwen.php" method="post">
<table>
<tr>
<td><input type="text" size="50"></td>
<td><input type="submit" value="问问采集"></td>
</tr>
</table>
</form>

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:http://www.heiqu.com/a3ac0fc0106b8fcba947ff4658030cf3.html