部落格觀察的資料探勘

部落格觀察的資料提供吾人資料探勘很好用的地方,加上使用Java的htmlparser類別,可以寫出一些程式來探索其中的資訊,範例程式提供如下:

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;

import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

class Test
{
public static void main (String[] args)
{
String proxy = “防火牆主機”,
port = “防火牆port”;
Properties systemProperties = System.getProperties();
systemProperties.setProperty(“http.proxyHost”,proxy);
systemProperties.setProperty(“http.proxyPort”,port);
String username = “資料庫帳號”;
String password = “資料庫密碼”;
String durl = “jdbc:mysql://localhost/資料庫名稱?useUnicode=true&characterEncoding=utf8”;
String driver = “com.mysql.jdbc.Driver”;

try {
Class.forName(driver);
Connection connection = DriverManager.getConnection(durl, username, password);
Statement statement = connection.createStatement();
for(int i=2;i<4;i++)
{

try
{
StringBuilder sql = new StringBuilder(“insert into 資料表名稱 values(‘”);
sql.append(i+”‘,'”);
StringBuilder qurl = new StringBuilder(“http://look.urs.tw/show.php?BlogID=”+i);
Parser parser = new Parser (qurl.toString());
String filterStr = “table”;
NodeFilter filter = new TagNameFilter(filterStr);
NodeList nodeList = parser.extractAllNodesThatMatch(filter);
TableTag tabletag = (TableTag) nodeList.elementAt(4);
NodeList title = tabletag.childAt(0).getChildren().elementAt(0).getChildren();
LinkTag node = (LinkTag)title.elementAt(0);
String blogname = title.elementAt(0).getChildren().toHtml();
sql.append(blogname+”‘,'”);
String url = node.extractLink();
sql.append(url+”‘,”);
TableRow[] trows = tabletag.getRows();
System.out.println(url);
TableColumn[] tcol3 = trows[3].getColumns();
String growthdegree = tcol3[0].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(growthdegree+”,”);
String liveness = tcol3[3].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(liveness+”,”);
TableColumn[] tcol4 = trows[4].getColumns();
String totalrank  = tcol4[0].getChildren().elementAt(1).getChildren().toHtml();
sql.append(totalrank+”,”);
String growth = tcol4[3].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(growth+”,”);
TableColumn[] tcol5 = trows[5].getColumns();
String index  = tcol5[0].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(index+”,”);
String influence = tcol5[3].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(influence+”,”);
TableColumn[] tcol6 = trows[6].getColumns();
String total   = tcol6[0].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(total+”,”);
String uvalue = tcol6[3].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(uvalue+”,”);
TableColumn[] tcol7 = trows[7].getColumns();
String u1   = tcol7[0].getChildren().elementAt(0).getChildren().toHtml();
sql.append(u1+”,”);
String[] technoratis = tcol7[3].getChildren().elementAt(0).getChildren().toHtml().split(“/”);
String technorati = technoratis[0];
sql.append(technorati+”,”);
TableColumn[] tcol8 = trows[8].getColumns();
String google   = tcol8[0].getChildren().elementAt(0).getChildren().toHtml();
sql.append(google+”,”);
String googlelinks = tcol8[3].getChildren().elementAt(0).getChildren().toHtml();
sql.append(googlelinks+”,”);
String yahoolinks = tcol8[3].getChildren().elementAt(2).getChildren().toHtml();
sql.append(yahoolinks+”,”);
TableColumn[] tcol9 = trows[9].getColumns();
String blogger   = tcol9[0].getChildren().elementAt(0).getChildren().toHtml();
sql.append(blogger+”,”);
String MyWeb = tcol9[3].getChildren().elementAt(0).getChildren().toHtml();
sql.append(MyWeb+”,”);
TableColumn[] tcol10 = trows[10].getColumns();
String bloglines   = tcol10[0].getChildren().elementAt(0).toHtml();
sql.append(bloglines+”,”);
String delicious = tcol10[3].getChildren().elementAt(0).getChildren().toHtml();
sql.append(delicious+”,”);
TableColumn[] tcol11 = trows[11].getColumns();
String Furl   = tcol11[0].getChildren().elementAt(0).getChildren().toHtml();
sql.append(Furl+”,”);
String HemiDemi = tcol11[3].getChildren().elementAt(0).getChildren().toHtml();
sql.append(HemiDemi+”,”);
TableColumn[] tcol12 = trows[12].getColumns();
String MyShare   = tcol12[0].getChildren().elementAt(0).toHtml();
sql.append(MyShare+”,”);
String[] u2 = tcol12[3].getChildren().elementAt(0).toHtml().split(“/”);
String look = u2[0];
sql.append(look+”,”);
String MyZilla = u2[1];
sql.append(MyZilla+”,”);
TableColumn[] tcol13 = trows[13].getColumns();
String mybloglog   = tcol13[0].getChildren().elementAt(0).getChildren().toHtml();
sql.append(mybloglog+”,”);
String[] u3   = tcol13[3].getChildren().elementAt(0).getChildren().toHtml().split(“/”);
String feedburner = u3[0];
sql.append(feedburner+”,”);
TableColumn[] tcol14 = trows[14].getColumns();
String alexaall = tcol14[0].getChildren().elementAt(0).getFirstChild().toHtml();
sql.append(alexaall+”,”);
String alexataiwan = tcol14[3].getChildren().elementAt(0).getFirstChild().toHtml();
sql.append(alexataiwan+”,”);
TableColumn[] tcol15 = trows[15].getColumns();
String TRank   = tcol15[0].getChildren().elementAt(0).getChildren().toHtml();
sql.append(TRank+”,”);
String[] u4 = tcol15[3].getChildren().elementAt(0).toHtml().split(“/”);
String funp0 = u4[0];
sql.append(funp0+”,”);
String funp1 = u4[1];
sql.append(funp1+”,”);
TableColumn[] tcol16 = trows[16].getColumns();
String onhits  = tcol16[0].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(onhits+”,”);
String weekhits  = tcol16[3].getChildren().elementAt(0).toHtml().replace(“/”,””);
sql.append(weekhits+”,”);
TableColumn[] tcol17 = trows[17].getColumns();
String goodreading   = tcol17[0].getChildren().elementAt(0).toHtml();
sql.append(goodreading+”,”);
String PageRank   = tcol17[3].getChildren().elementAt(0).toHtml();
sql.append(PageRank+”,”);
TableColumn[] tcol18 = trows[18].getColumns();
String blogvalue    = tcol18[0].getChildren().elementAt(0).toHtml();
sql.append(blogvalue+”,”);
TableColumn[] tcol19 = trows[19].getColumns();
String[] u5   = tcol19[0].getChildren().elementAt(0).getChildren().toHtml().split(“/”);
String Plurk = u5[0];
sql.append(Plurk+”,”);
String Plurktive = tcol19[3].getChildren().elementAt(0).getFirstChild().toHtml().replace(“/”,””);
sql.append(Plurktive+”,null)”);
System.out.println(sql);
statement.executeUpdate(sql.toString());
}
catch (ParserException pe)
{
pe.printStackTrace ();
}
}
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}
}

感謝你看到這裡,很快就可以離開了,但最好的獎勵行動就是按一下幫我分享或留言,感恩喔~

點我分享到Facebook

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *