Linux环境下Java语言实现简陋Web爬虫:import java.io.BufferedReader; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.Socket; import java.net.UnknownHostException;public class WebCrawler { private static String Text_File_Path = "/home/zms/htmldoc/htmldoc1.html"; //运行前最好先建立此目录和文件,用于存放爬取的页面内容 public static void main(String[] args) { // TODO Auto-generated method stub try { File file = new File(Text_File_Path); FileWriter fpWriter = new FileWriter(file);
//生成下载对象 Socket webclient = new Socket("ubuntuone.cn",80); PrintWriter result = new PrintWriter(webclient.getOutputStream(),true); BufferedReader receiver = new BufferedReader(new InputStreamReader(webclient.getInputStream()));