?jsoup 网络爬虫 学习例子(三) 抓取豆瓣电影海报图片 下载到本地
?
?
class="java">package com.iteye.injavawetrust.img; /** * * @author InJavaWeTrust * */ public class Constants { /** * 存放海报图片目录 */ public static final String IMGPATH = "E:\\InJavaWeTrust\\jsoup\\img\\"; /** * JPG格式 */ public static final String EXTJPG = ".jpg"; /** * 豆瓣最受欢迎的影评URL */ public static final String URL = "http://movie.douban.com/review/best/"; /** * 每页显示记录条数 */ public static final int NUM = 10; /** * 拼接分页 */ public static final String START = "?start="; } package com.iteye.injavawetrust.img; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** * * @author InJavaWeTrust * */ public class JsoupUtil { private JsoupUtil(){ } private static final JsoupUtil instance = new JsoupUtil(); public static JsoupUtil getInstance(){ return instance; } /** * 获得电影海报图片 * @param imgSrc 海报路径 * @param imgAlt 海报名称 */ public void getImg(String imgSrc, String imgAlt) { InputStream is = null; OutputStream os = null; try { URL url = new URL(imgSrc); is = url.openStream(); os = new FileOutputStream(new File(Constants.IMGPATH + imgAlt + Constants.EXTJPG)); int data = 0; while((data = is.read()) != -1){ os.write(data); os.flush(); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if(null != is){ try { is.close(); } catch (IOException e) { e.printStackTrace(); } } if(null != os){ try { os.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * 抓取豆瓣电影海报图片 */ public void getDoubanFilmImg(){ try{ //1--5页电影海报图片 for(int i = 0; i < 5; i++){ String url = Constants.URL + Constants.START + String.valueOf(i * Constants.NUM); Document document = Jsoup.connect(url).timeout(5000).get(); Elements ul = document.select("ul"); Iterator<Element> ulIter = ul.iterator(); while(ulIter.hasNext()){ Elements li = ulIter.next().select("li.ilst"); Iterator<Element> itLi = li.iterator(); while(itLi.hasNext()){ Element ele = itLi.next(); Elements img = ele.select("img"); String imgSrc = img.attr("src"); System.out.println(imgSrc); String imgAlt = img.attr("alt"); System.out.println(imgAlt); JsoupUtil.getInstance().getImg(imgSrc, imgAlt); } } } } catch (Exception e){ e.printStackTrace(); } } } package com.iteye.injavawetrust.img; /** * * @author InJavaWeTrust * */ public class GetDoubanFilmImg { public static void main(String[] args) { JsoupUtil ju = JsoupUtil.getInstance(); ju.getDoubanFilmImg(); } }
?
?
运行结果:
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315652554.jpg
太阳的后裔
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2204200412.jpg
女医明妃传
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2319378505.jpg
上瘾
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2226342851.jpg
维京传奇 第三季
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2231323410.jpg
少帅
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315652554.jpg
太阳的后裔
http://img3.douban.com/view/movie_poster_cover/ipst/public/p868550285.jpg
银翼杀手
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315652554.jpg
太阳的后裔
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315805100.jpg
西游记之孙悟空三打白骨精
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2309810802.jpg
45周年
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315805100.jpg
西游记之孙悟空三打白骨精
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315277870.jpg
中国故事
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2322058074.jpg
因为爱情有幸福
。。。。。。
。。。。。。
。。。。。。
文件截图如下:
?