class="java">public class WordUtil { //2003 public static List<String> getWordTitles2003(String path) throws IOException{ File file = new File(path); String filename = file.getName(); filename = filename.substring(0, filename.lastIndexOf(".")); InputStream is = new FileInputStream(path); HWPFDocument doc = new HWPFDocument(is); Range r = doc.getRange(); List<String> list = new ArrayList<String>(); for (int i = 0; i < r.numParagraphs(); i++) { Paragraph p = r.getParagraph(i); // check if style index is greater than total number of styles int numStyles =doc.getStyleSheet().numStyles(); int styleIndex = p.getStyleIndex(); if (numStyles > styleIndex) { StyleSheet style_sheet = doc.getStyleSheet(); StyleDescription style = style_sheet.getStyleDescription(styleIndex); String styleName = style.getName(); if (styleName!=null&&styleName.contains("标题")) { // write style name and associated text // System.out.println(styleName + " -> " + p.text()); // System.out.println(p.text()); String text = p.text(); list.add(text); } } } //TODO 图表跟图片不一样,需另外处理 //得到word数据流 byte [] dataStream = doc.getDataStream(); //用于在一段范围内获得段落数 int numCharacterRuns = r.numCharacterRuns(); // System.out.println("CharacterRuns 数:"+numCharacterRuns); //负责图像提取 和 确定一些文件某块是否包含嵌入的图像。 PicturesTable table = new PicturesTable(doc, dataStream, null, null, null); //文章图片编号 int i = 1; for(int j=0 ; j<numCharacterRuns ; j++){ //这个类表示一个文本运行,有着共同的属性。 CharacterRun run = r.getCharacterRun(j); //是否存在图片 boolean bool = table.hasPicture(run); if(bool){ //返回图片对象绑定到指定的CharacterRun Picture pic = table.extractPicture(run, true); //图片的内容字节写入到指定的输出流。 pic.writeImageContent(new FileOutputStream("E:\\temp\\"+filename+"_"+i+".jpg")); i++; } } return list; } public static List<String> getWordTitles2007(String path) throws IOException{ InputStream is = new FileInputStream(path); //2007 // OPCPackage p = POIXMLDocument.openPackage(path); // XWPFWordExtractor e = new XWPFWordExtractor(p); // POIXMLDocument doc = e.getDocument(); List<String> list = new ArrayList<String>(); XWPFDocument doc = new XWPFDocument(is); XWPFParagraph[]paras = doc.getParagraphs(); for (XWPFParagraph graph : paras) { String text = graph.getParagraphText(); String style = graph.getStyle(); if ("1".equals(style)) { // System.out.println(text+"--["+style+"]"); }else if ("2".equals(style)) { // System.out.println(text+"--["+style+"]"); }else if ("3".equals(style)) { // System.out.println(text+"--["+style+"]"); }else{ continue; } list.add(text); } return list; } public static void main(String[] args) throws IOException { String path = "E:/temp/poi_test.doc"; List<String> list = new ArrayList<String>(); if (path.endsWith(".doc")) { list = getWordTitles2003(path); }else if (path.endsWith(".docx")) { list = getWordTitles2007(path); } for (String title : list) { System.out.println(title); } } }
?