阅读背景:

(58)-- 用正则层层爬取图片

来源:互联网 

# 用正则层层爬取图片

from urllib import request
import re

base_url = 'https://www.mmonly.cc/wmtp/fjtp/list_21_{}.html'

def download(pic_url):
    print('downloading...%s' % pic_url)
    fname = pic_url.split('/')[-1]
    request.urlretrieve(pic_url,'images/' + fname)

def getPage():

    for i in range(1,73):
        fullurl = base_url.format(i)
        response = request.urlopen(fullurl)
        html = response.read().decode('gb2312','ignore')
        url_pat = re.compile(r'<div class="btns" > <a class="img_album_btn" href="/go.html?url=https://www.mmonly.cc/wmtp/fjtp/(.*?)"', re.S)
        res = url_pat.findall(html)

        for url in res:
            new_url = 'https://www.mmonly.cc/wmtp/fjtp/' + url
            response = request.urlopen(new_url)
            html = response.read().decode('gb2312','ignore')
            img_pat = re.compile(r'<img alt=".*?" src="(.*?)"')
            res = img_pat.findall(html)

            for pic_url in res:
                download(pic_url)


if __name__ == '__main__':
    getPage()




from urllib import request
import



你的当前访问异常,请进行认证后继续阅读剩余内容。

分享到: