在实现之前,需要注意一个问题。关于Cookie,如果能很好跟踪上到Cookie,基本上,实现都是小事一碟。
在网络上,有提到过验证码老是提示有错。貌似思路流程都是正确的。兄弟,如果你在半个小时还没搞清楚,先休息一下,听听歌,打打游戏。再想过。
推断为什么会出现这种问题。问题是你登录验证码页面(jsp,aspx),实际上就是一个会话或者Cookie记录。此时,你有去连接登录页面,有可能已经重新生成一个新的验证码啦。你用了之前的验证码,指定会提示验证码
错误。
在这里,我只能说一下我遇到的这种问题的解决办法。如果还有其他情况,可以私下M我,我们继续过。
一、抓取一个农业网站思路步骤如下:
1、首先需要获取验证码页面,把它以图片的格式保证下来。(图片格式需要通过抓包的工具查看。如:httpWatch)并跟踪到的cookie保存下来。cookie必须要跟抓包工具看到的cookie一致。
2、提交登录页面所需要的字段和验证码,字段尽可能全部提交上去。
二、代码实现。
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.DefaultHttpParams;
public class __10086 {
public void test() throws HttpException, IOException{
String url = "http://12582.10086.cn/user/login/verifycode.aspx";
HttpClient httpClient = new HttpClient();
DefaultHttpParams.getDefaultParams().setParameter("http.protocol.cookie-policy", CookiePolicy.BROWSER_COMPATIBILITY);
GetMethod getMethod = new GetMethod("http://12582.10086.cn/user/login/verifycode.aspx");
getMethod.setRequestHeader("Host", "12582.10086.cn");
getMethod.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.2; rv:5.0.1) Gecko/20100101 Firefox/5.0.1");
getMethod.setRequestHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
getMethod.setRequestHeader("Accept-Language", "zh-cn,zh;q=0.5");
getMethod.setRequestHeader("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7");
getMethod.setRequestHeader("Connection", "keep-alive");
// getMethod.setRequestHeader("Cookie", "Hm_lvt_6e06bb5a029d6c5473951d1079638828=1328777184942; Hm_lvt_e64244e1e591d0337e17a12b714c0996=1328777186856; WT_FPC=id=183.16.35.230-1530895312.30204303:lv=1328174810886:ss=1328174810886; ASP.NET_SessionId=gj25p555exiqjd45kdcqoq55; BIGipServernxt-wz=369797312.20480.0000; Hm_lpvt_6e06bb5a029d6c5473951d1079638828=1328777184942; Hm_lpvt_e64244e1e591d0337e17a12b714c0996=1328777186856; .12582portals=4CF97704261E34DBE12913CBA18211005E960282A771D94FF3709BAFC99610A7397BE8293ADB2E876A0A7380AC4A158016419745F27511D6E79F82A408D009519D6DDFE18A578E5CFC5F48025C75B33B6EBD43953A7DB05AEBAAB856E0AA29112818B4910350AADACD2012F5DE56297B1F6622ED7F4959D31E19C474E48F7773D7966437");
getMethod.setRequestHeader("Cache-Control", "max-age=0");
int code = httpClient.executeMethod(getMethod);
Header header = getMethod.getResponseHeader("Set-cookie");
System.out.println(header.getValue());
String headerCookie = header.getValue();
String SessionId = headerCookie.substring(headerCookie.indexOf("NET_SessionId=") + "NET_SessionId=".length(), headerCookie.indexOf(";"));
System.out.println(SessionId);
String BIGipServernxt = headerCookie.substring(headerCookie.indexOf("BIGipServernxt-wz=") + "BIGipServernxt-wz=".length(), headerCookie.length());
BIGipServernxt = BIGipServernxt.substring(0, BIGipServernxt.indexOf(";"));
StringBuffer sb = new StringBuffer();
sb.append("Hm_lvt_6e06bb5a029d6c5473951d1079638828=1328778307106; ");
sb.append("Hm_lvt_e64244e1e591d0337e17a12b714c0996=1328778308090; ");
sb.append("WT_FPC=id=183.16.35.230-1530895312.30204303:lv=1328174810886:ss=1328174810886; ");
sb.append("ASP.NET_SessionId=").append(SessionId).append("; ");
sb.append("BIGipServernxt-wz=").append(BIGipServernxt).append("; ");
sb.append("Hm_lpvt_6e06bb5a029d6c5473951d1079638828=1328778307106; ");
sb.append("Hm_lpvt_e64244e1e591d0337e17a12b714c0996=1328778308090");
System.out.println(sb.toString());
String ValidCode = savegif(getMethod);
getMethod.releaseConnection();
// 第二次链接
httpClient.getHostConfiguration().setHost("12582.10086.cn", 80, "http");
PostMethod method = getPostMethod(ValidCode);
method.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.2; rv:5.0.1) Gecko/20100101 Firefox/5.0.1");
method.setRequestHeader("Host", "12582.10086.cn");
method.setRequestHeader("Accept", "application/json, text/javascript, */*");
method.setRequestHeader("Accept-Language", "zh-cn,zh;q=0.5");
// method.setRequestHeader("Accept-Encoding", "gzip, deflate");
method.setRequestHeader("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7");
method.setRequestHeader("Connection", "keep-alive");
method.setRequestHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
method.setRequestHeader("X-Requested-With", "XMLHttpRequest");
method.setRequestHeader("Referer", "http://12582.10086.cn/user/login/");
method.setRequestHeader("Cookie", sb.toString());
httpClient.executeMethod(method);
System.out.println(method.getStatusCode());
System.out.println(method.getResponseBodyAsString());
// 第三次链接
Header header1 = method.getResponseHeader("Set-cookie");
System.out.println("dd=" + header1.getValue());
sb.append("; " + header1.getValue());
Cookie[] cookies = httpClient.getState().getCookies();
method.releaseConnection();
String my = "http://12582.10086.cn/my";
System.out.println(sb.toString());
getMethod = new GetMethod(my);
getMethod.setRequestHeader("Host", "12582.10086.cn");
getMethod.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.2; rv:5.0.1) Gecko/20100101 Firefox/5.0.1");
getMethod.setRequestHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
getMethod.setRequestHeader("Accept-Language", "zh-cn,zh;q=0.5");
getMethod.setRequestHeader("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7");
getMethod.setRequestHeader("Connection", "keep-alive");
getMethod.setRequestHeader("Referer", "http://12582.10086.cn/user/login");
getMethod.setRequestHeader("Cookie", sb.toString());
code = httpClient.executeMethod(getMethod);
System.out.println(getMethod.getStatusCode());
System.out.println(getMethod.getResponseBodyAsString());
File storeFile = new File("c:/2008sohu.html");
FileOutputStream output = new FileOutputStream(storeFile);
//得到网络资源的字节数组,并写入文件
output.write(getMethod.getResponseBody());
output.close();
}
public String savegif(GetMethod getMethod) throws IOException {
File storeFile = new File("c:/2008sohu.gif");
FileOutputStream output = new FileOutputStream(storeFile);
//得到网络资源的字节数组,并写入文件
output.write(getMethod.getResponseBody());
output.close();
InputStreamReader is = new InputStreamReader(System.in);
BufferedReader br = new BufferedReader(is);
String ValidCode = "";
try {
ValidCode = br.readLine();
br.close();
is.close();
} catch(Exception e) {
e.printStackTrace();
}
return ValidCode;
}
private PostMethod getPostMethod(String ValidCode) {
PostMethod post = new PostMethod("/ajax/postlogin");
NameValuePair[] simcard = {
new NameValuePair("email", "XXX"),
new NameValuePair("password", "XXX"),
new NameValuePair("ValidCode", ValidCode),
new NameValuePair("rme", "0"),
};
post.setRequestBody(simcard);
return post;
}
public static void main(String args[]) {
__10086 _10086 = new __10086();
try {
_10086.test();
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
来源于:http://www.agrilink.cn/