(不能直接JVM设置代理
System.getProperties().setProperty("http.proxyHost", ip);
System.getProperties().setProperty("http.proxyPort", "80");
设置JVM代理后 无法再次获取GetDlIp.dlip(); )
-----------------------------------------设置请求的代理IP
package com.bageer.meituan;
//9f27a0700a6b26192380a791807639bf
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.NameValuePair;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import com.bageer.dao.DataInfoDao;
import com.bageer.dao.DataInfoDaoImpl;
import com.bageer.model.Shop_Info_Model;
public
class GetMtLhfj {
public static DataInfoDao dao=new DataInfoDaoImpl();
public static void main(String[] args) {
int page=1;
while(true){
List list=dao.getData("select SHOP_URL ,Rownum rn from o2o_shop_info t where city='邢台市' and pt_name='美团外卖' and shop_url like '%access_source%' and is_lhfj is null", "1");
if(list.size()==0){
System.exit(0);
}
for (int i = 0; i < list.size(); i++) {
Map map=(Map)list.get(i);
String shop_url=map.get("SHOP_URL").toString();
Shop_Info_Model shopIfo=search(shop_url.split("\\?")[0].replace("restaurant", "qualification"));
if(shopIfo!=null){
if(shopIfo.getDeath()==null){
shopIfo.setShop_url(shop_url);
dao.update(shopIfo);
}else{
//死链接,为什么要单独更新死链接
//单独更新死链接可以只更新死链接的状态并不修改数据库中的数据
}
}
System.out.println(page+"==========================");
/*try {
Random rnd=new Random();
Thread.sleep((rnd.nextInt(1000)+1000));
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}*/
}
page++;
}
}
public static Shop_Info_Model search(String url){
String ip=GetDlIp.dlip();
CloseableHttpClient httpClient = HttpClients.create
Default();
CloseableHttpResponse response = null;
InputStream is = null;
//封装请求参数
List<NameValuePair> params = new ArrayList<NameValuePair>();
params.add(new BasicNameValuePair("access_source", "1004"));
String str = "";
try {
//转换为键值对
str = EntityUtils.toString(new UrlEncodedFormEntity(params, Consts.UTF_8));
System.out.println(str);
//创建Get请求
HttpGet httpGet = new HttpGet(url+"?"+str);
httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpGet.setHeader("Cache-Control", "max-age=0");
httpGet.setHeader("Connection", "keep-alive");
httpGet.setHeader("Cookie", "w_uuid=jIr1mG-pCMj69WwvZxpD7LvaHf2k9xnJPzGB9aeIPDWEG-mSYOur_6UIOYG-DZfP; _lxsdk_cuid=162cc2b4462c8-0977acd7dae47d-6b1b1279-100200-162cc2b4463c8; _lxsdk=162cc2b4462c8-0977acd7dae47d-6b1b1279-100200-162cc2b4463c8; _lx_utm=utm_campaign%3Dbaidu%26utm_source%3D1522; _ga=GA1.2.1772268467.1523843811; _gid=GA1.2.1093233036.1524031032; w_cid=110108; w_cpy_cn=\"%E6%B5%B7%E6%B7%80%E5%8C%BA\"; w_cpy=haidianqu; waddrname=\"%E7%B4%AB%E9%87%91%E5%A4%A7%E5%8E%A6\"; w_geoid=wx4equc9mmtp; w_ah=\"39.972626846283674,116.31454780697823,%E7%B4%AB%E9%87%91%E5%A4%A7%E5%8E%A6\"; JSESSIONID=jn7psbnnfzlg11c9ckvs7gfzd; _ga=GA1.3.1772268467.1523843811; _gid=GA1.3.1093233036.1524031032; _gat=1; w_utmz=\"utm_campaign=baidu&utm_source=1522&utm_medium=(none)&utm_content=(none)&utm_term=(none)\"; w_visitid=90067f92-aa84-4233-8ded-4943e427773c; __mta=251558201.1523843813548.1524031660063.1524031728661.43; _lxsdk_s=162d7540a1a-567-6e4-74b%7C%7C22");
httpGet.setHeader("Host", "waimai.meituan.com");
httpGet.setHeader("Upgrade-Insecure-Requests", "1");
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
//执行Get请求,
System.out.println(ip);
HttpHost proxy = new HttpHost(ip.split(":")[0],Integer.parseInt( ip.split(":")[1].split(",")[0]));
RequestConfig requestConfig = RequestConfig.custom()
.setProxy(proxy)
.setConnectTimeout(10000)
.setSocketTimeout(10000)
.setConnectionRequestTimeout(3000)
.build();
httpGet.setConfig(requestConfig);
response = httpClient.execute(httpGet);
//得到响应体
HttpEntity entity = response.getEntity();
if(entity != null){
is = entity.getContent();
//转换为字节输入流
BufferedReader br = new BufferedReader(new InputStreamReader(is, Consts.UTF_8));
String body = null;
StringBuilder sb=new StringBuilder();
while((body=br.readLine()) != null){
sb.append(body);
}
Document doc= Jsoup.parse(sb.toString());
Shop_Info_Model shopinfo=new Shop_Info_Model();
if(doc.text().indexOf("返回首页")>0){
shopinfo.setDeath("1");
return shopinfo;
}
String csdj = null;// 场所等级
String gldj = null;// 管理等级
String jcrq = null;// 检查日期
String dwmc = null;// 单位名称
String xkzh = null;// 许可证号
String fddbr = null;// 法定代表人
String jydz = null;// 经营地址
String ztyt = null;// 主体业态
String jyfw = null;// 经营范围
String yxq = null;// 有效期
int indexOf = sb.indexOf("营业执照");
String yyzh = null;
String xkz = null;
if (indexOf > -1) {
yyzh = "1";
} else {
yyzh = "0";
}
int indexOf2 = sb.indexOf("餐饮服务许可证");
if (indexOf2 > -1) {
xkz = "1";
} else {
indexOf2 = sb.indexOf("特许证件");
if (indexOf2 > -1) {
xkz = "1";
} else {
xkz = "0";
}
}
shopinfo.setIs_yyzz(yyzh);
shopinfo.setIs_xkz(xkz);
String text = doc.getElementsByAttributeValue("class", "new-zizhi").text();
if (text.length() > 2) {
shopinfo.setIs_lhfj("1");
if (text.indexOf("场所等级:") > -1) {
csdj = text.split("场所等级:")[1].trim().split(" ")[0].trim();
System.out.println(csdj);
shopinfo.setCsdj(csdj);
}
if (text.indexOf("管理等级:") > -1) {
gldj = text.split("管理等级:")[1].trim().split(" ")[0].trim();
System.out.println(gldj);
shopinfo.setGldj(gldj);;
}
if (text.indexOf("检查日期:") > -1) {
jcrq = text.split("检查日期:")[1].trim().split(" ")[0].trim();
System.out.println(jcrq);
shopinfo.setJcrq(jcrq);
}
if (text.indexOf("单位名称:") > -1) {
dwmc = text.split("单位名称:")[1].trim().split(" ")[0].trim();
System.out.println(dwmc);
shopinfo.setDwmc(dwmc);
}
if (text.indexOf("许可证号:") > -1) {
xkzh = text.split("许可证号:")[1].trim().split(" ")[0].trim();
System.out.println(xkzh);
shopinfo.setXkzh(xkzh);
}
if (text.indexOf("法定代表人") > -1) {
fddbr = text.split("法定代表人")[1].trim().split(" ")[0].trim();
System.out.println(fddbr);
shopinfo.setFddbr(fddbr);
}
if (text.indexOf("经营地址:") > -1) {
jydz = text.split("经营地址:")[1].trim().split(" ")[0].trim();
System.out.println(jydz);
shopinfo.setJydz(jydz);
}
if (text.indexOf("主体业态:") > -1) {
ztyt = text.split("主体业态:")[1].trim().split(" ")[0].trim();
System.out.println(ztyt);
shopinfo.setZtyt(ztyt);
}
if (text.indexOf("经营项目:") > -1) {
jyfw = text.split("经营项目:")[1].trim().split(" ")[0].trim();
System.out.println(jyfw);
shopinfo.setJyfw(jyfw);
}
if (text.indexOf("有效期:") > -1) {
yxq = text.split("有效期:")[1].trim().split(" ")[0].trim();
System.out.println(yxq);
shopinfo.setYxq(yxq);
}
} else {
System.out.println("没有量化分级"+"================"+sb.length());
System.out.println("没有量化分级"+"================"+sb.indexOf("营业执照"));
System.out.println("没有量化分级"+"================"+sb.indexOf("许可证"));
shopinfo.setIs_lhfj("0");
if(sb.length()<500){
return null;
}
}
return shopinfo;
}
} catch (Exception e) {
e.printStackTrace();
} finally{
//关闭输入流,释放资源
if(is != null){
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
//消耗实体内容
if(response != null){
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
}
//关闭相应 丢弃http连接
if(httpClient != null){
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return null;
}
}
-----------------------------------获取代理IP
package com.bageer.meituan;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
public class GetDlIp {
public static String dlip(){
// 每隔几秒提取一次IP
// 请填写无忧代理IP订单号,填写之后才可以提取到IP哦
String order = "xxxx";
try {
java.net.URL url = new java.net.URL("http://api.ip.data5u.com/dynamic/get.html?order=" + order + "&ttl&random=true");
//java.net.URL url = new java.net.URL("http://103.30.41.108/Tools/proxyIP.ashx?Type=YYYYY888888885545555555556&qty=5");
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
connection.setConnectTimeout(3000);
connection = (HttpURLConnection)url.openConnection();
InputStream raw = connection.getInputStream();
InputStream in = new BufferedInputStream(raw);
byte[] data = new byte[in.available()];
int bytesRead = 0;
int offset = 0;
while(offset < data.length) {
bytesRead = in.read(data, offset, data.length - offset);
if(bytesRead == -1) {
break;
}
offset += bytesRead;
}
in.close();
raw.close();
String[] res = new String(data, "UTF-8").split("\n");
System.out.println(">>>>>>>>>>>>>>当前返回IP量 " + res.length);
return res[0];
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
}