代码比较粗糙,只是为了快速完成批量域名注册查询需求,后续会修改完善程序。
核心:通过目标网站==>抓取Url列表==>抓取内容keyword==>阿里API进行查找==>返回注册信息
程序运行效果
#_*_coding:utf-8_*_
import requests
from bs4 import BeautifulSoup
import re
import tldextract
import time
import random
web_u='http://www.egouz.com/north-america/america/'#目标域名
for x in range(310,532):#循环构建URL地址
u=web_u+str(x)+'.html'
r=requests.get(url=u,timeout=5)
print(web_u+str(x)+".html")
soup=BeautifulSoup(r.text,'lxml')
html=soup.text
url_new=soup.find('li',class_='tab active').find_all('a',class_='btn-more')#find内容页URL地址list
for k in url_new:#循环URL地址list
url_1='http://www.egouz.com/'+k['href']
r1=requests.get(url=url_1,timeout=100)
soup1=BeautifulSoup(r1.text,'lxml')
gotourl=soup1.find('a',class_='btn-goto pull-left')
if gotourl == None:#查找gotourl为空 跳过
break
print(gotourl['href'])
cha_url=gotourl['href']
val=tldextract.extract(cha_url)#使用tldextract库查找 主域名
val1=val.domain+'.cn'#拼接.cn格式
time.sleep(random.randint(1,5))
domain_new="https://checkapi.aliyun.com/check/checkdomain?domain=%s&command=&token=Y847a5e60c85b0f08e40bba55aa021566&ua=¤cy=&site=&bid=&_csrf_token=&callback=jsonp_1538051895584_24373" %val1 #阿里查找域名API
r2=requests.get(url=domain_new,timeout=100)
soup2=BeautifulSoup(r2.text,'lxml')
html=soup2.text
lists =re.findall('"avail":0', html)
if lists:#判断list是否为空
print(val1+'已注册')
else:
with open('/Users/yangxin/Desktop/国外域名7.txt', 'a') as f:#将可以注册内容写进txt
print(val1+'可以注册')
f.write('原域名:'+cha_url+'\n'+'注册域名:'+val1+'\n')
f.close()