# 用正则层层爬取图片
from urllib import request
import re
base_url = 'https://www.mmonly.cc/wmtp/fjtp/list_21_{}.html'
def download(pic_url):
print('downloading...%s' % pic_url)
fname = pic_url.split('/')[-1]
request.urlretrieve(pic_url,'images/' + fname)
def getPage():
for i in range(1,73):
fullurl = base_url.format(i)
response = request.urlopen(fullurl)
html = response.read().decode('gb2312','ignore')
url_pat = re.compile(r'<div class="btns" > <a class="img_album_btn" href="/go.html?url=https://www.mmonly.cc/wmtp/fjtp/(.*?)"', re.S)
res = url_pat.findall(html)
for url in res:
new_url = 'https://www.mmonly.cc/wmtp/fjtp/' + url
response = request.urlopen(new_url)
html = response.read().decode('gb2312','ignore')
img_pat = re.compile(r'<img alt=".*?" src="(.*?)"')
res = img_pat.findall(html)
for pic_url in res:
download(pic_url)
if __name__ == '__main__':
getPage()
from urllib import request
import