###resultstring is something like this '(1646^hen)+19' | |
def getport(resultstring): | |
port = eval(resultstring) | |
return port | |
proxyurl= 'http://www.pachong.org/' | |
try: | |
r = requests.get(proxyurl,timeout=60*4) | |
except: | |
print 'I can not get the date of pachong.org' | |
if r.status_code != 200: | |
print 'the status is not good. status_code is %s' % r.status_code | |
return | |
ht = BeautifulSoup(r.content) | |
animals = str(ht.head.find_all('script')[-1].text) | |
[eval(item.replace('var','').strip()) for item in animals.split(';')]###it is wrong here | |
table = ht.find_all('table', attrs={'class':'tb'}) | |
if not table: | |
return | |
table = table[0] | |
trs = table.find_all('tr',attrs={'data-type':'high'}) | |
for tr in trs: | |
idlestring = tr.find_all('td')[5].text | |
idlestring = idlestring.replace('\n','').replace(' ','') | |
if idlestring == u'空闲': | |
# proxy_id += 1 | |
ip = tr.find_all('td')[1].text | |
portstring = tr.find_all('td')[2].text | |
patt = re.compile(u'document.write\((.*?)\);') | |
if re.findall(patt,portstring): | |
resultstring = re.findall(patt,portstring)[0] | |
else: | |
continue | |
port = getport(resultstring) | |
ip_port = '%s:%s' % (ip, port) | |
print 'ip_port is %s' % ip_port | |
![]() |
1
no13bus OP 好像解决了。直接用
for item in animals.split(';'): exec(item.replace('var','').strip()) exec即可。不知道大家还有没有什么别的优雅的解决办法。 |