用httpunit写的spider程序:可以监测网站的错误页面!(2) if (newLink) { System.out.println("Total links checked so far: " + checkedLinks.size()); checkLink(links[i]); } } }
private boolean isHtml(WebResponse response) { return response.getContentType().equals("text/html"); }
private void checkLink(WebLink link) throws Exception { WebRequest request = link.getRequest(); Java.net.URL url = request.getURL(); System.out.println("checking link: " + url); String linkHost = url.getHost(); if (linkHost.equals(this.host)) { WebResponse response = conversation.getResponse(request); this.checkAllLinks(response); }
}}