阅读背景:

jsoup 爬取电影

来源:互联网 
  package com.spider; import com.mysql.jdbc.Connection; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import java.io.IOException; import java.net.URL; import java.sql.*; import java.util.ArrayList; /** * @Author zhaoxin * @Email [email protected] * @Description //TODO * @Date 2018/11/29 **/ public class DB { public static void main(String[] args) throws Exception { // connect(); String ptah="https://www.douban.com/doulist/3907668/?tdsourcetag=s_pctim_aiomsg&qq-pf-to=pcqq.c2c"; CloseableHttpResponse indexRes = sendGet(ptah); ArrayList<String> daoyan=new ArrayList<String>(); ArrayList<String> zhuyan=new ArrayList<String>(); ArrayList<String> leixing=new ArrayList<String>(); ArrayList<String> guojia=new ArrayList<String>(); ArrayList<String> year=new ArrayList<String>(); ArrayList<String> name=new ArrayList<String>(); ArrayList<String> url=new ArrayList<String>(); ArrayList<String> p=new ArrayList<String>(); // 获取json内容,将其转换为字符串 Document document=Jsoup.parse(new URL(ptah),2000); //拿到电影名称 Elements moveName=document.select("div[class=title] a"); Elements moveUrl=document.select("div[class=title] a"); Elements moveKey=document.select("div[class=abstract]"); Elements moveP=document.select("span[class=rating_nums]"); for (int i=0;i<moveUrl.size();i++){ url.add(moveUrl.get(i).attr("href")); } for (int i=0;i<moveName.size();i++){ name.add(moveName.get(i).text()); } for (int i=0;i<moveP.size();i++){ p.add(moveP.get(i).text()); } System.out.println(moveKey.get(0).text()); for (int i=0;i<moveKey.size();i++){ String s=moveKey.get(i).text(); if (s.contains("导演")&&s.contains("主演")&&s.contains("类型")&&s.contains("制片国家")&&s.contains("年份")){ daoyan.add(moveKey.get(i).text().substring(s.indexOf("导演")+4,s.indexOf("主演"))); zhuyan.add(moveKey.get(i).text().substring(s.indexOf("主演")+4,s.indexOf("类型"))); leixing.add(moveKey.get(i).text().substring(s.indexOf("类型")+4,s.indexOf("地区"))); guojia.add(moveKey.get(i).text().substring(s.indexOf("地区")+4,s.indexOf("年份"))); year.add(moveKey.get(i).text().substring(s.indexOf("年份"))+4); } } for (int i=0;i<20;i++){ connect(name.get(i),url.get(i),p.get(i),daoyan.get(i),zhuyan.get(i),leixing.get(i),guojia.get(i),year.get(i)); } } //发送get请求,获取响应结果 public static CloseableHttpResponse sendGet(String url) throws IOException { //创建httpClient客户端 CloseableHttpClient httpClient = HttpClients.createDefault(); //创建请求对象,发送请求 HttpGet httpGet = new HttpGet(url); httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36"); CloseableHttpResponse response = httpClient.execute(httpGet); return response; } public static void connect(String s1,String s2,String s3,String s4,String s5,String s6,String s7,String s8)throws Exception{ String driver="com.mysql.jdbc.Driver"; String url="jdbc:mysql://localhost:3306/test.db?useSSL=true"; String user="root"; String pwd ="root"; Connection conn = null; PreparedStatement stmt = null; // 注册 JDBC 驱动 Class.forName("com.mysql.jdbc.Driver"); // 打开链接 System.out.println("连接数据库..."); conn = (Connection) DriverManager.getConnection(url,user,pwd); stmt=conn.prepareStatement("insert into move values(?,?,?,?,?,?,?,?)"); stmt.setString(1,s1); stmt.setString(2,s2); stmt.setString(3,s3); stmt.setString(4,s4); stmt.setString(5,s5); stmt.setString(6,s6); stmt.setString(7,s7); stmt.setString(8,s8); System.out.println("连接成"); stmt.executeUpdate(); conn.close(); } }   package com.spider; import com.mysql.jdbc.Conne



你的当前访问异常,请进行认证后继续阅读剩余内容。

分享到: