采集ymx商品信息_JAVA_编程开发_程序员俱乐部

中国优秀的程序员网站程序员频道CXYCLUB技术地图
热搜:
更多>>
 
您所在的位置: 程序员俱乐部 > 编程开发 > JAVA > 采集ymx商品信息

采集ymx商品信息

 2018/8/29 18:47:48  st4024589553  程序员俱乐部  我要评论(0)
  • 摘要:publicstaticvoidmain(String[]args){Stringkeyword="鼠标";keyword=UrlEncoded.encodeString(keyword);System.out.println(keyword);System.out.println(System.currentTimeMillis());StringurlStr="https://www.amazon.cn/s/ref=sr_pg_1
  • 标签:
public static void main(String[] args) {
String keyword = "鼠标";
keyword=UrlEncoded.encodeString(keyword);
System.out.println(keyword);

System.out.println(System.currentTimeMillis());
    String urlStr = "https://www.amazon.cn/s/ref=sr_pg_1?rh=i%3Aaps%2Ck%3A%E9%BC%A0%E6%A0%87&page=1&keywords=%E9%BC%A0%E6%A0%87&ie=UTF8&qid=1535523044";
    try {
    String html =getHttpHeaders(urlStr);
    Document doc = Jsoup.parse(html);
    Elements els = doc.select("div.s-item-container");
    String goodName ="";
    String goodUrl ="";
    String goodPrice ="";
    for (Element e : els) {
    goodName= e.getElementsByAttributeValue("class", "a-link-normal s-access-detail-page  s-color-twister-title-link a-text-normal").attr("title");
    goodUrl= e.getElementsByAttributeValue("class", "a-link-normal s-access-detail-page  s-color-twister-title-link a-text-normal").attr("href");
    goodPrice= e.getElementsByAttributeValue("class", "a-size-base a-color-price s-price a-text-bold").text();
    if(goodPrice.contains("¥")){
    goodPrice=goodPrice.replace("¥", "");
    }
    if(goodName.length()>0){
    System.out.println("goodName="+goodName);
    System.out.println("goodUrl="+goodUrl);
    System.out.println("goodPrice="+goodPrice);
    }
   
    }

    } catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
       
    } 
     
public static void trustEveryone() {
try {
HttpsURLConnection
.setDefaultHostnameVerifier(new HostnameVerifier() {
public boolean verify(String hostname,
SSLSession session) {
return true;
}
});
SSLContext context = SSLContext.getInstance("TLS");
context.init(null, new X509TrustManager[] { new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] chain,
String authType) throws CertificateException {
}

public void checkServerTrusted(X509Certificate[] chain,
String authType) throws CertificateException {
}

public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
} }, new SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(context
.getSocketFactory());
} catch (Exception e) {
// e.printStackTrace();
}
}

public static String getHttpHeaders(String  urlStr) {
try {
URL url = new URL(urlStr);
trustEveryone();
Connection conn = HttpConnection.connect(url);
conn.timeout(10000);
// 默认是 GET方式
conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
conn.header("Accept-Encoding", "gzip, deflate, br");
conn.header("Accept-Language", "zh-CN,zh;q=0.9");
conn.header("Cache-Control", "max-age=0");
conn.header("Connection", "keep-alive");
conn.header("Cookie", "session-id=459-7685866-4114631; ubid-acbcn=462-7605214-6740260; x-wl-uid=1/8WbnRRWqUmuHEwnk0vv/5QUwLPz7ExJ8sDDXwii/B7q7Q7du4lfbGND+N96grUw6aQQqTgRWjU=; session-token=\"61BROo0R3lWTferrCdSW7uhrLZsEr2nenTtEcaRYw5gdaQSPQTdVCZcBY4Z0m5aG3dYjFa9ig743cQQN1d/OFW215SCO+iS21Y8D1Gtf5HEPvdlzUxTlXuDJS1+MHRB7+DYx9NNXQagHg+lb6Rsz3ZIuTyd+HfSEmuoJMTuVUnxNbFcW/5CoeB+bWzkX9UmOa7StU6t/s+ak5O6uulcWZ+MTPx7lsF0KJfc+9CY3d6g/esAYdddvWA==\"; session-id-time=2082729601l; csm-hit=tb:XHNG7XF4GSF6CRHXT26C+sa-Q9YXGCPZY9WTMWCPRN2W-5JG052ATG7KJSS01Y89P|"+System.currentTimeMillis()+"&adb:adblk_yes");
conn.header("Host", "www.amazon.cn");
conn.header("Upgrade-Insecure-Requests", "1");
conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36");
conn.ignoreContentType(true);
conn.execute();
String html=conn.get().html();
//System.out.println( html);
return html;
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}

return "请求的页面不存在或链接错误 !!!";
}
  • 相关文章
发表评论
用户名: 匿名