阅读背景:

为什么网站爬行会永远消失?

来源:互联网 
public class Parser {

    public static void main(String[] args) {
        Parser p = new Parser();
        p.matchString();
    }

    parserObject courseObject = new parserObject();
    ArrayList<parserObject> courseObjects = new ArrayList<parserObject>();
    ArrayList<String> courseNames = new ArrayList<String>();
    String theWebPage = " ";

    {
        try {
            URL theUrl = new URL("https://ocw.mit.edu/courses/");
            BufferedReader reader =
                new BufferedReader(new InputStreamReader(theUrl.openStream()));
            String str = null;

            while((str = reader.readLine()) != null) {
                theWebPage = theWebPage + " " + str;
            }
            reader.close();

        } catch (MalformedURLException e) {
            // do nothing
        } catch (IOException e) {
            // do nothing
        }
    }

    public void matchString() {
        // this is my regex that I am using to compare strings on input page
        String matchRegex = "#\w+(-\w+)+";

        Pattern p = Pattern.compile(matchRegex);
        Matcher m = p.matcher(theWebPage);

        int i = 0;
        while (!m.hitEnd()) {
            try {
                System.out.println(m.group());
                courseNames.add(i, m.group());
                i++;
            } catch (IllegalStateException e) {
                // do nothing
            }
        }
    }
}
public class Parser {

    public static void m



你的当前访问异常,请进行认证后继续阅读剩余内容。

分享到: