import feapder import re import json from nodejs.bindings import node_run class DoubanBookSpider(feapder.AirSpider): def start_requests(self): with open(r'/Users/yangxin/Desktop/douban_book/keyword.txt', 'r') as f: for key in f.readlines(): urls = 'https://search.douban.com/book/subject_search?search_text={}&cat=1001'.format(key) print("本次抓取" + key) yield feapder.Request(urls) def parse(self, request, res
# -*- coding: utf-8 -*- """ Created on 2023-07-13 15:54:59 --------- @summary: --------- @author: yangxin """ import feapder class DoubanBookSpider(feapder.AirSpider): def start_requests(self): yield feapder.Request("https://book.douban.com/subject/36415409/?icn=index-latestbook-subject") def parse(self, request, response): # 提取网站title print(response.xpath('//*[@id="wrapper"]/h1/span/text()').extract_first())
思路 自定义关键词=>阿里API=>查询结果保存本地txt # -*- coding:UTF-8 -*- import requests from bs4 import BeautifulSoup import re def xfun(): S1 = 'abcdefghijklmnopqrstuvwxyz' S2 = 'abcdefghijklmnopqrstuvwxyz' S3 = 'abcdefghijklmnopqrstuvwxyz' l = [a+b+c for a in S1 for b in S2 for c in S3] return l def domain(k1,k2,h): val1=str(k1)+str(k2)+'.'+str(h) print(val1) domain_new="https://checkapi.aliyun.com/check/checkdomain?domain=%s&command=&token=Y847a5e60c85b0f08e
Wuyangxin