class="java" name="code"> package com.xinhuanet.cloudDesk.controller; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpConnectionManager; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpConnectionManagerParams; import org.htmlparser.Parser; import org.htmlparser.visitors.HtmlPage; public class R { public static void main(String[] args) throws Exception { HttpClient httpClient = new HttpClient(); httpClient.getHostConfiguration().setProxy("202.84.17.41", 8080); HttpConnectionManager httpConnManager = httpClient .getHttpConnectionManager(); if (httpConnManager != null) { HttpConnectionManagerParams mgrParams = new HttpConnectionManagerParams(); mgrParams.setSoTimeout(20000000); mgrParams.setTcpNoDelay(true); mgrParams.setConnectionTimeout(20000000); mgrParams.setLinger(0); mgrParams.setStaleCheckingEnabled(false); httpConnManager.setParams(mgrParams); } String url = "http://www.poetry4cn.com"; GetMethod methodGet = new GetMethod(url); httpClient.executeMethod(methodGet); String charset = getCharSet(new String(methodGet.getResponseBody())); System.out.println("getCharSet:" + charset); String responseGet = new String(methodGet.getResponseBody(), charset); System.out.println(responseGet); Parser myParser = Parser.createParser(responseGet.toString(), charset); HtmlPage visitor = new HtmlPage(myParser); myParser.visitAllNodesWith(visitor); String textInPage = visitor.getTitle(); System.out.println("title:" + textInPage); } public static String getCharSet(String content) { // String regex = ".*charset=([^;]*).*"; String regex = "<meta.+?charset=[^\\w]?([-\\w]+)"; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(content); if (matcher.find()) return matcher.group(1); else return null; } }