import java.io.bufferedinputstream;import java.net.malformedurlexception;import java.net.url;import java.net.urlconnection;import java.util.list;import org.junit.ignore;import org.junit.test;import org.xml.sax.inputsource;import com.sun.syndication.feed.wirefeed;import com.sun.syndication.feed.module.module;import com.sun.syndication.feed.rss.channel;import com.sun.syndication.feed.rss.item;import com.sun.syndication.feed.synd.syndentry;import com.sun.syndication.feed.synd.syndfeed;import com.sun.syndication.feed.synd.syndfeedimpl;import com.sun.syndication.io.syndfeedinput;import com.sun.syndication.io.wirefeedinput;import com.sun.syndication.io.xmlreader;//rss(简易资讯聚合)//rss也叫聚合rss是在线共享内容的一种简易方式(也叫聚合内容,really simple syndication)。public class simplerometest { @ignore @test public void testfetchjavaeyeopml() throws exception { url javaeyeurl = new url("http://www.iteye.com/rss"); wirefeedinput feedinput = new wirefeedinput(); wirefeed feed = feedinput.build(new xmlreader(javaeyeurl)); } @test public void testfetchjavaeyenewsrss() throws exception { url javaeyeurl = new url("http://www.iteye.com/rss/news/"); stringbuffer sb = new stringbuffer(1024*1024); bufferedinputstream is = new java.io.bufferedinputstream(javaeyeurl.openconnection().getinputstream()); int ch = is.read(); while(ch != -1) { sb.append((char)ch); ch = is.read(); } system.out.println(new string(sb.tostring().getbytes("iso-8859-1"), "utf-8")); /** * <!doctype html public "-//w3c//dtd html 4.01 transitional//en"<!doctype html public "-//w3c//dtd xhtml 1.0 transitional//en" "http://www.w3.org/tr/xhtml1/dtd/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-cn" dir="ltr"> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> <meta name="keyword" content="javaeye,it,开发,交流,社区,java, ruby, ajax, agile" /> <title>您的访问请求被拒绝 - javaeye技术社区</title> <style type="text/css"> .clearfix:after { content: "."; display: block; height: 0; clear: both; visibility: hidden; } .clearfix { display:block; } .left { float: left; } h1 {font-size: 20px;color: #6293bb;} p {font-size: 14px;color: #6293bb;} </style> </head> <body> <div style="padding:50px 0 0 300px"> <h1>您的访问请求被拒绝</h1> </div> <div class="clearfix"> <div class="left" style="padding-left:120px"> <img src="/Upload/Images/2011102808/9269D78FB028B6F8.jpg" width="128" height="128" /> </div> <div class="left" style="width:700px;padding:30px 0 0 30px"> <p>您可能使用了网络爬虫抓取javaeye网站页面!</p> <p>javaeye网站不允许您使用网络爬虫对javaeye进行恶意的网页抓取,请您立刻停止该抓取行为!</p> <p>如果您的网络爬虫不属于恶意抓取行为,希望javaeye网站允许你进行网页抓取,请和javaeye管理员联系,取得授权: webmaster<img src='/Upload/Images/2011102808/FC471E3C0AF3285A.gif' alt="email" />support.iteye.com</p> <p>如果您确实使用浏览器访问,但是被错误的识别为网络爬虫,请将您浏览器发送的“user agent”信息告知我们,帮助我们解决错误: webmaster<img src='/Upload/Images/2011102808/FC471E3C0AF3285A.gif' alt="email" />support.iteye.com</p> </div> </div> <div style="padding:20px 0 0 500px"> <a href="http://www.iteye.com"><img src='/Upload/Images/2011102808/3FDBC9FA008645D7.gif' border='0'></a> </div> </body></html> */ // syndfeedinput feedinput = new syndfeedinput(true);// syndfeed feed = feedinput.build(new inputsource(javaeyeurl.openstream()));// // rome中rss的可选标准 // // rss_0.90, rss_0.91, rss_0.92, rss_0.93, rss_0.94, rss_1.0, rss_2.0, atom_0.3 // list<syndentry> entries = feed.getentries();// // for(syndentry entry : entries) {// system.out.println("title:" + entry.gettitle());// system.out.println("desc:" + entry.getdescription());// system.out.println("link:" + entry.getlink());// system.out.println("date:" + entry.getpublisheddate());// system.out.println("==================================");// system.out.println("==================================");// system.out.println("==================================");// } } @test public void testfetchjavaeyenewsrsswithhttpclient() throws exception { url javaeyeurl = new url("http://www.iteye.com/rss/news/"); urlconnection conn = javaeyeurl.openconnection(); conn.setrequestproperty("user-agent", "mozilla/5.0 (windows; u; windows nt 5.1; zh-cn; rv:1.9.2.3) gecko/20100401 firefox/3.6.3"); syndfeedinput feedinput = new syndfeedinput(true); syndfeed feed = feedinput.build(new inputsource(conn.getinputstream())); // rome中rss的可选标准 // rss_0.90, rss_0.91, rss_0.92, rss_0.93, rss_0.94, rss_1.0, rss_2.0, atom_0.3 list<syndentry> entries = feed.getentries(); for(syndentry entry : entries) { system.out.println("title:" + entry.gettitle()); system.out.println("desc:" + entry.getdescription().getvalue()); system.out.println("link:" + entry.getlink()); system.out.println("date:" + entry.getpublisheddate()); system.out.println("=================================="); system.out.println("=================================="); system.out.println("=================================="); } }}