首頁 / CompScience / Programming / Languages / java / 透過proxy使用HttpURLconnection抓網站文章

透過proxy使用HttpURLconnection抓網站文章

在PHP中有一個有名的函式庫:curl可用

那在Java也有類似的類別可以使用,那就是HttpURLconnection

ㄚ琪這一次從
cURL equivalent in JAVA
java HttpURLConnection來實作get及post動作
解析HttpURLConnection與代理伺服器

這三篇文章整理出一個簡單的程式,供大家參考:

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Properties;

public class Lookurls {

public static boolean doPost(String sURL, String data, String cookie,
String referer, String charset) {

boolean doSuccess = false;
java.io.BufferedWriter wr = null;
try {

URL url = new URL(sURL);
HttpURLConnection URLConn = (HttpURLConnection) url
.openConnection();

URLConn.setDoOutput(true);
URLConn.setDoInput(true);
((HttpURLConnection) URLConn).setRequestMethod(“POST”);
URLConn.setUseCaches(false);
URLConn.setAllowUserInteraction(true);
HttpURLConnection.setFollowRedirects(true);
URLConn.setInstanceFollowRedirects(true);

URLConn
.setRequestProperty(
“User-agent”,
“Mozilla/5.0 (Windows; U; Windows NT 6.0; zh-TW; rv:1.9.1.2) ”
+ “Gecko/20090729 Firefox/3.5.2 GTB5 (.NET CLR 3.5.30729)”);
URLConn
.setRequestProperty(“Accept”,
“text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8”);
URLConn.setRequestProperty(“Accept-Language”,
“zh-tw,en-us;q=0.7,en;q=0.3”);
URLConn.setRequestProperty(“Accept-Charse”,
“Big5,utf-8;q=0.7,*;q=0.7”);
if (cookie != null)
URLConn.setRequestProperty(“Cookie”, cookie);
if (referer != null)
URLConn.setRequestProperty(“Referer”, referer);

URLConn.setRequestProperty(“Content-Type”,
“application/x-www-form-urlencoded”);
URLConn.setRequestProperty(“Content-Length”, String.valueOf(data
.getBytes().length));

java.io.DataOutputStream dos = new java.io.DataOutputStream(URLConn
.getOutputStream());
dos.writeBytes(data);

java.io.BufferedReader rd = new java.io.BufferedReader(
new java.io.InputStreamReader(URLConn.getInputStream(),
charset));
String line;
while ((line = rd.readLine()) != null) {
System.out.println(line);
}

rd.close();
} catch (java.io.IOException e) {
doSuccess = false;

} finally {
if (wr != null) {
try {
wr.close();
} catch (java.io.IOException ex) {

}
wr = null;
}
}

return doSuccess;
}

public static boolean doGet(String sURL, String cookie, String referer,
String charset) {
boolean doSuccess = false;
BufferedReader in = null;
try {
URL url = new URL(sURL);
HttpURLConnection URLConn = (HttpURLConnection) url
.openConnection();
URLConn
.setRequestProperty(
“User-agent”,
“Mozilla/5.0 (Windows; U; Windows NT 6.0; zh-TW; rv:1.9.1.2) ”
+ “Gecko/20090729 Firefox/3.5.2 GTB5 (.NET CLR 3.5.30729)”);
URLConn
.setRequestProperty(“Accept”,
“text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8”);
URLConn.setRequestProperty(“Accept-Language”,
“zh-tw,en-us;q=0.7,en;q=0.3”);
URLConn.setRequestProperty(“Accept-Charse”,
“Big5,utf-8;q=0.7,*;q=0.7”);

if (cookie != null)
URLConn.setRequestProperty(“Cookie”, cookie);
if (referer != null)
URLConn.setRequestProperty(“Referer”, referer);
URLConn.setDoInput(true);
URLConn.setDoOutput(true);
URLConn.connect();
URLConn.getOutputStream().flush();
in = new BufferedReader(new InputStreamReader(URLConn
.getInputStream(), charset));

String line;
while ((line = in.readLine()) != null) {
System.out.println(line);
}

} catch (IOException e) {
doSuccess = false;

e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch (java.io.IOException ex) {

}
in = null;

}
}

return doSuccess;
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String proxy = “proxy”,//你的防火牆伺服器
port = “8080”;//你的防火牆port
Properties systemProperties = System.getProperties();
systemProperties.setProperty(“http.proxyHost”,proxy);
systemProperties.setProperty(“http.proxyPort”,port);
doGet(“你要抓的網址”,null,null,”該網址的字元集”);
}

}

馬上成為工作達人的Fans

About ㄚ琪

工作達人Fun Taiwan的創辦者及總編,可以在這裡更認識他。

發表迴響

你的電子郵件位址並不會被公開。 Required fields are marked *

*

這個網站採用 Akismet 服務減少垃圾留言。進一步瞭解 Akismet 如何處理網站訪客的留言資料

Scroll To Top