被折腾的受不了,大致实现了访问频率稍微高就自动封掉访问,似乎是封一个小时什么比较好,不想为恶意访问的人考虑太多,所以自动解封就不实现了
大致上要耗费1ms的执行时间
实现:php mysql memcached
思路:正常蜘蛛白名单,读到memcached里去,其它的计划任务再检测,频率太高就封掉。
文件结构:
bots.txt 蜘蛛IP数据
white.txt 需要补充到白名单的数据
inc.php需要包含到动态页面的函数
block.php计划任务执行脚本
---------bots.txt-----------------
//googlebot
64.233.160.0-64.233.191.255
66.102.0.0-66.102.15.255
66.249.64.0-66.249.95.255
72.14.192.0-72.14.255.255
74.125.0.0-74.125.255.255
209.85.128.0-209.85.255.255
216.239.32.0-216.239.63.255
//bingbot
64.4.0.0-64.4.63.255
65.52.0.0-65.55.255.255
131.253.21.0-131.253.47.255
157.54.0.0-157.60.255.255
207.46.0.0-207.46.255.255
207.68.128.0-207.68.207.255
//baiduspider
180.76.5.0-180.76.5.255
180.76.6.0-180.76.6.255
220.181.108.0-220.181.108.255
123.125.67.0-123.125.67.255
123.125.71.0-123.125.71.255
//sogouspider
218.30.103.0-218.30.103.255
//yandexbot
199.21.99.0-199.21.99.255
针对用户这么写:
function initipdata(){
global $memd;
$ips = array();
$key = 'bots.txt';
$ips = $memd->get($key);
if(empty($ips)){
$temp = explode("\n",file_get_contents(dirname(__FILE__).'/bots.txt'));
foreach($temp as $raw){
$raw = trim($raw);
if(!empty($raw) && substr($raw,0,2)!='//'){
$ra = explode('-',$raw);
if(long2ip(ip2long($ra[0]))==$ra[0] && long2ip(ip2long($ra[0]))==$ra[0]){
$ips[] = array('s'=>ip2long($ra[0]),'e'=>ip2long($ra[1]));
}
}
}
if(!empty($ips)) $memd->set($key,$ips,86400);
}
return $ips;
}
function ipwarn(){
global $conn,$memd;
$check = false;
$ips = initipdata();
$key = $_SERVER['REMOTE_ADDR'];
$raw = ip2long($key);
foreach($ips as $ip){
if(!$check && $raw>=$ip['s'] && $raw<=$ip['e']){
$check = true;
}
}
if(!$check){
$num = 0;
$num = $memd->get($key);
$num = $num+1;
$memd->set($key,$num,120);
if($num>30){
mysql_query('insert into `_zwarn` (`ip`,`time`) values (\''.addslashes($key).'\',\''.addslashes(time()).'\');',$conn);
}
}
}
然后做个计划任务
function show($s){
echo $s."\r\n";
}
function getipnum($key){
global $memd;
$num = 0;
$num = $memd->get($key);
return $num;
}
function gethost($ip){
$r = '';
$r = gethostbyaddr($ip);
return $r;
}
function checkip($ip,$host){
$white = array('.googlebot.com','.baidu.com','.yandex.com','.sogou.com','.bing.com','.msn.com');
$check = true;
if(!empty($host) && $ip!==$host){
$addr = gethostbyname($host);
if($addr==$ip && str_ireplace($white,'',$host)!=$host){
$check = false;
}
}
return $check;
}
$meta = array();
$sql = 'select * from `_zwarn`;';
$rs = mysql_query($sql,$conn);
while($row=mysql_fetch_object($rs)){
$meta[($row->ip)] = $row->time;
}
foreach(array_keys($meta) as $addr){
$data = array();
$data['addr'] = $addr;
$data['host'] = gethost($data['addr']);
$data['bool'] = checkip($data['addr'],$data['host']);
$data['last'] = 30;
$data['curr'] = getipnum($data['addr']);
$data['time'] = time()-$meta[($data['addr'])];
$data['rate'] = ($data['curr']-$data['last'])/$data['time'];
foreach(array_keys($data) as $key){
show($key.': '.$data[$key]);
}
if($data['bool']){
if($data['rate']>0.5 || $data['curr']>60){
show('iptables -I INPUT -s '.$data['addr'].'/32 -j DROP');
passthru('iptables -I INPUT -s '.$data['addr'].'/32 -j DROP');
mysql_query('insert into `_zblock` (`ip`,`host`) values (\''.addslashes($data['addr']).'\',\''.addslashes($data['host']).'\');');
mysql_query('delete from `_zwarn` where `ip`=\''.addslashes($data['addr']).'\';');
if($data['curr']>0){
$memd->set($data['addr'],0,120);
}
}
if($data['curr']<1){
mysql_query('delete from `_zwarn` where `ip`=\''.addslashes($data['addr']).'\';');
}
}
else{
file_put_contents(dirname(__FILE__).'/white.txt',$data['addr']."\r\n",FILE_APPEND);
mysql_query('delete from `_zwarn` where `ip`=\''.addslashes($data['addr']).'\';');
}
show('');
}