该用户从未签到
|
程序逻辑:
从文件config.txt读取url链接,根据url打开输入流,将接收到的网页内容保存到文件。
- package com.changying.spider;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.FileReader;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.net.URL;
- public class Spider {
- public static void main(String[] args) throws IOException {
- System.out.println(args[0]);
- System.out.println(args[1]);
- File config = new File(args[0]);
- //System.out.println("Hello World!");
-
- BufferedReader fileReader = new BufferedReader(new FileReader(config));
- String strUrl = fileReader.readLine();
- System.out.println(strUrl);
- URL url = new URL(strUrl);
- InputStream in = url.openStream();
- File resultFile = new File(args[1] + "\\url001.html");
- if (!resultFile.exists()) {
- resultFile.createNewFile();
- }
- //OutputStream result = new FileOutputStream(args[1] + "\\url001.html");
- OutputStream result = new FileOutputStream(resultFile);
-
- byte[] buffer = new byte[4096];
- int bytes_read;
-
- while ((bytes_read = in.read(buffer)) != -1) {
- result.write(buffer, 0, bytes_read);
- }
- fileReader.close();
- in.close();
- result.close();
- }
- }
复制代码
|
|