I have a Scrapy middleware:
我有一个简单的中间件:
class ProxyMiddleware(object):
def __init__(self, settings):
self.proxy_file = settings.get('PROXY_FILE')
fin = open(self.proxy_file)
self.proxy_list = list()
for line in fin.readlines():
parts = line.strip().split()
proxy = parts[2] + '://' + parts[0] + ':' + parts[1]
self.proxy_list.append(proxy)
def process_request(self, request, spider):
request.meta['proxy'] = random.choice(self.proxy_list)
cla