今天课堂笔记
from urllib import request
import re
url="https://www.kgc.cn/coding/"
for i in range(1,25): #把抓取到的页面存到本机,从页面得知总共25页
fname='list-{0}-6-9-9-0.shtml'.format(i)
response=request.urlopen(url+fname)
print('开始保存:{}'.format(fname))
f=open(fname,'w+',encoding='utf-8')
f.writelines(response.read().decode("utf-8"))
f.close()
for i in range(1,25): #解析
fname='list-{0}-6-9-9-0.shtml'.format(i)
f=open(fname,'r',encoding='UTF-8')
f2=open('data-{0}.txt'.format(i),'w+',encoding='utf-8')
htmlStr=f.read()
p=r'<a href="/go.html?url=.+" class="yui3-u course-title-a" target="_blank" alt=".+">(.+)</a>'
p2=r'<span class="course-pepo">(\d+)</span>'
p3=r'<span class="view0-old">(.+)</span>'
rs1=re.findall(p,htmlStr,re.I|re.M)
rs2=re.findall(p2,htmlStr,re.I|re.M)
rs3=re.findall(p3,htmlStr,re.I|re.M)
for n in range(len(rs1)): #把解析结果写入档案
f2.write("{0} {1} {2}".format(rs1[n],rs2[n],rs3[n]))
f2.write('\n')
f2.close()
f.close()from urllib import request
impor