List of usage examples for org.apache.poi.hwpf HWPFDocument HWPFDocument
public HWPFDocument(DirectoryNode directory) throws IOException
From source file:poi.xssf.usermodel.examples.EmbeddedObjects.java
License:Apache License
public static void main(String[] args) throws Exception { OPCPackage pkg = OPCPackage.open(args[0]); XSSFWorkbook workbook = new XSSFWorkbook(pkg); for (PackagePart pPart : workbook.getAllEmbedds()) { String contentType = pPart.getContentType(); // Excel Workbook - either binary or OpenXML if (contentType.equals("application/vnd.ms-excel")) { HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream()); }//from w ww .j a va 2 s .c o m // Excel Workbook - OpenXML file format else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) { XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream()); } // Word Document - binary (OLE2CDF) file format else if (contentType.equals("application/msword")) { HWPFDocument document = new HWPFDocument(pPart.getInputStream()); } // Word Document - OpenXML file format else if (contentType .equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) { XWPFDocument document = new XWPFDocument(pPart.getInputStream()); } // PowerPoint Document - binary file format else if (contentType.equals("application/vnd.ms-powerpoint")) { HSLFSlideShow slideShow = new HSLFSlideShow(pPart.getInputStream()); } // PowerPoint Document - OpenXML file format else if (contentType .equals("application/vnd.openxmlformats-officedocument.presentationml.presentation")) { OPCPackage docPackage = OPCPackage.open(pPart.getInputStream()); XSLFSlideShow slideShow = new XSLFSlideShow(docPackage); } // Any other type of embedded object. else { System.out.println("Unknown Embedded Document: " + contentType); InputStream inputStream = pPart.getInputStream(); } } pkg.close(); }
From source file:projekt.servise.impl.ReadDataFromWordServiceImpl.java
@Override public void getData() { String FilePath = "C:/Users/Lenovo/Documents/NetBeansProjects/SoftwareArchitectureProject-master/src/main/java/projekt/nimekiri_test.doc"; FileInputStream fis;/*from w w w.j a v a 2 s . co m*/ try { fis = new FileInputStream(new File(FilePath)); HWPFDocument doc = new HWPFDocument(fis); WordExtractor extractor = new WordExtractor(doc); Connection conn = DriverManager.getConnection( "jdbc:postgresql://dev.vk.edu.ee:5432/GroupWork?currentSchema=project", "t131566", "t131566"); String text = extractor.getText(); String strippedText = extractor.stripFields(text).replace("\r\n\r\n", "\n").replace("\t", " ") .replace("\r\n", "\n"); String[] paragraphs = strippedText.split("\n"); String code = ""; List<String> groupNames = groupService.getGroupNames(); for (int i = 8; i < paragraphs.length; i++) { String line = paragraphs[i].replace("*", "").replace("OK", "").replace("TREV", "").replace("REV", ""); int index = 0; String jrk = ""; if (!paragraphs[i].trim().isEmpty() && paragraphs[i].substring(0, 3).contains("Jrk")) { String groupCode = paragraphs[i].substring(17, 21); jrk = paragraphs[i]; index = strippedText.indexOf(jrk); String groupName = ""; int j = 1; do { if (!paragraphs[i - j].trim().isEmpty()) { if (Character.isUpperCase(paragraphs[i - j].charAt(3))) { groupName = paragraphs[i - j]; if (groupName.contains("(KAUGPE)")) { groupName = groupName.replace("(KAUGPE)", "").trim(); } } } j++; } while (!paragraphs[i - j].trim().isEmpty()); PreparedStatement preparedStatementGetGroup = conn.prepareStatement( "SELECT id FROM project.group where name is null and groupcode like ?"); preparedStatementGetGroup.setString(1, groupCode + "%"); ResultSet resultGroup = preparedStatementGetGroup.executeQuery(); while (resultGroup.next()) { int groupId = resultGroup.getInt(1); PreparedStatement preparedStatementSetGroupName = conn .prepareStatement("UPDATE project.group SET name=? where id=?"); preparedStatementSetGroupName.setString(1, groupName.replace(" ", " ")); preparedStatementSetGroupName.setInt(2, groupId); preparedStatementSetGroupName.executeUpdate(); } } } for (int i = 8; i < paragraphs.length; i++) { String line = paragraphs[i].replace("*", "").replace("OK", "").replace("TREV", "").replace("REV", ""); /* int index = 0; String jrk = "";*/ if (!paragraphs[i].trim().isEmpty() && paragraphs[i].substring(0, 3).contains("Jrk")) { /* String groupCode = paragraphs[i].substring(17, 21);*/ code = line.substring(line.indexOf(":") + 1, line.indexOf(":") + 11); code = code.replace("", "").replace(" - ", "").replace(" ", ""); } if (!line.trim().isEmpty() && !line.contains("KOOD") && !line.contains("KAUGPE") && !line.contains("lipilane") && !groupNames.contains(line) && !line.contains("Jrk") && !isAllUpperCase(line)) { String[] splittedLine = line.split(" "); String studentLastname = ""; List<String> newSplittedLine = new ArrayList<String>(); for (String item : splittedLine) { if (!item.isEmpty()) { newSplittedLine.add(item); } } if (newSplittedLine.size() >= 4) { PreparedStatement preparedStatementGetStudent = conn .prepareStatement("SELECT * FROM project.student where code=?"); if (newSplittedLine.size() == 4) { preparedStatementGetStudent.setString(1, newSplittedLine.get(2)); studentLastname = newSplittedLine.get(1); } else if (newSplittedLine.size() == 5) { preparedStatementGetStudent.setString(1, newSplittedLine.get(3)); studentLastname = newSplittedLine.get(1) + " " + newSplittedLine.get(2); } ResultSet resultStudent = preparedStatementGetStudent.executeQuery(); if (!resultStudent.next()) { PreparedStatement preparedStatementGetPerson = conn.prepareStatement( "SELECT * FROM project.person where firstname=? and lastname=?"); preparedStatementGetPerson.setString(1, newSplittedLine.get(0)); preparedStatementGetPerson.setString(2, studentLastname); ResultSet resultPersonExists = preparedStatementGetPerson.executeQuery(); if (!resultPersonExists.next()) { PreparedStatement preparedStatementNewPerson = conn.prepareStatement( "INSERT INTO project.person (firstname,lastname,roleid) VALUES (?,?,?)"); if (newSplittedLine.size() == 4) { preparedStatementNewPerson.setString(1, newSplittedLine.get(0)); preparedStatementNewPerson.setString(2, studentLastname); preparedStatementNewPerson.setInt(3, 2); } else if (newSplittedLine.size() == 5) { preparedStatementNewPerson.setString(1, newSplittedLine.get(0)); preparedStatementNewPerson.setString(2, studentLastname); preparedStatementNewPerson.setInt(3, 2); } preparedStatementNewPerson.executeUpdate(); PreparedStatement preparedStatementLastPerson = conn.prepareStatement( "SELECT id FROM project.person where firstname=? and lastname=? and roleid=?"); preparedStatementLastPerson.setString(1, newSplittedLine.get(0)); preparedStatementLastPerson.setString(2, studentLastname); preparedStatementLastPerson.setInt(3, 2); Integer personId = 0; ResultSet resultPerson = preparedStatementLastPerson.executeQuery(); if (resultPerson.next()) { personId = resultPerson.getInt(1); System.out.println("GROUP CODE " + code); Group1 group = groupService.getByGroupcode(code); if (group != null) { System.out.println("GROUP ID " + group.getId()); PreparedStatement preparedStatementSetStudent = conn.prepareStatement( "INSERT INTO project.student (personid,groupid,code) values(?,?,?)"); preparedStatementSetStudent.setInt(1, personId); preparedStatementSetStudent.setInt(2, group.getId()); preparedStatementSetStudent.setString(3, code); preparedStatementSetStudent.executeUpdate(); System.out.println("NEW STUDENT " + personId); } else { System.out.println("GROUP WAS NULL "); PreparedStatement preparedStatementGroup = conn.prepareStatement( "INSERT INTO project.group (groupcode) VALUES (?)"); preparedStatementGroup.setString(1, code); preparedStatementGroup.executeUpdate(); System.out.println("NEW GROUP " + code); code = code.replace(" ", ""); PreparedStatement preparedStatementLastGroup = conn .prepareStatement("SELECT id FROM project.group where groupcode=?"); preparedStatementLastGroup.setString(1, code); System.out.println("SELECT id FROM project.group where groupcode=" + code); int groupId = 0; ResultSet resultLastGroup = preparedStatementLastGroup.executeQuery(); if (resultLastGroup.next()) { PreparedStatement preparedStatementStudentExist = conn.prepareStatement( "SELECT * FROM project.student where personid=?"); preparedStatementStudentExist.setInt(1, personId); ResultSet studentExists = preparedStatementStudentExist.executeQuery(); if (!studentExists.next()) { groupId = resultLastGroup.getInt(1); PreparedStatement preparedStatementSetStudent = conn .prepareStatement( "INSERT INTO project.student (personid,groupid,code) values(?,?,?)"); preparedStatementSetStudent.setInt(1, personId); preparedStatementSetStudent.setInt(2, groupId); preparedStatementSetStudent.setString(3, code); preparedStatementSetStudent.executeUpdate(); System.out.println("NEW STUDENT " + personId); } } } } } } } // System.out.println(); } } conn.commit(); conn.close(); } catch (IOException e) { Logger.getLogger(ReadDataFromExcelServiceImpl.class.getName()).log(Level.SEVERE, null, e); } catch (SQLException ex) { Logger.getLogger(ReadDataFromWordServiceImpl.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:ro.dabuno.office.integration.Word2Forrest.java
License:Apache License
public static void main(String[] args) throws IOException { System.out.println(args[0]);//w w w . j a v a2 s .c om InputStream is = new FileInputStream(args[0]); OutputStream out = new FileOutputStream("test.xml"); try { new Word2Forrest(new HWPFDocument(is), out); } finally { out.close(); is.close(); } }
From source file:rocky.sizecounter.SizeCounterUtil.java
License:Apache License
/** * Count Word's number of page from input directory. * /*from w w w . j av a 2s .c om*/ * @param filePath . * @return Number of A4 pages */ public static int countWordFile(String filePath) { FileInputStream fis = null; int page = 0; try { fis = new FileInputStream(filePath); if (CommonUtil.getExtension(filePath).equals("doc")) { // When file is .DOC HWPFDocument doc = new HWPFDocument(fis); page = doc.getDocProperties().getCPg(); } else if (CommonUtil.getExtension(filePath).equals("docx")) { // When file is .DOCX XWPFDocument doc = new XWPFDocument(fis); XWPFWordExtractor ex = new XWPFWordExtractor(doc); page = ex.getExtendedProperties().getUnderlyingProperties().getPages(); } } catch (FileNotFoundException ex) { LOG.warn("File " + filePath + " not found", ex); } catch (IOException ex) { LOG.warn("Invalid when reading file.", ex); } catch (Exception ex) { LOG.warn("Can not count file " + filePath, ex); } finally { if (fis != null) { try { fis.close(); } catch (IOException ex) { LOG.warn("Close the file input stream", ex); } } } return page; }
From source file:rzd.vivc.astzpte.beans.pagebean.ReportBean.java
public String generateReport(User usr) { HWPFDocument doc;/*from w ww. j av a2 s . c o m*/ Ticket ticket = usr.getTickets().get(0); List<UserAnswer> answers = usr.getTickets().get(0).getAnswers(); ArrayList<UserAnswerModel> questions = new ArrayList<>(); for (int i = 0; i < answers.size(); i++) { if (answers.get(i).getAnswer() != null) { questions.add(new UserAnswerModel(answers.get(i), i)); } } SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy"); SimpleDateFormat format1 = new SimpleDateFormat("hh:mm"); try (FileInputStream fis = new FileInputStream("c:\\rep\\templ.doc")) { doc = new HWPFDocument(fis); doc.getRange().getParagraph(3).replaceText("(dtBeg)", format.format(ticket.getDt_create())); doc.getRange().getParagraph(9).replaceText("(timeBeg)", format1.format(ticket.getDt_create())); doc.getRange().getParagraph(11).replaceText("(timeFin)", format1.format(ticket.getFinish())); long num = usr.getNum(); /* for (int i = 1; i <= 13; i++) { long mod = num % 10;*/ doc.getRange().replaceText("(num)"/* + (13 - i + 1) + ")"*/, num + ""); /* num = num / 10; }*/ doc.getRange().getParagraph(24).replaceText("(allow1)", usr.getAllowNum() + " " + format.format(usr.getAllowDat())); doc.getRange().replaceText("(tickNum)", ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getNum() + ""); doc.getRange().replaceText("(themeNum)", ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getTheme().getId() + ""); doc.getRange().replaceText("(themeName)", ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getTheme().getName()); int count = 0; for (int i = 1; i <= 50; i++) { UserAnswerModel answerModel = questions.get(i - 1); if (i < 10) { doc.getRange().replaceText("T0" + i, answerModel.getQuestion().getText()); doc.getRange().replaceText("C0" + i, answerModel.givenNumber() + ""); boolean cor = answerModel.correctNumber() == answerModel.givenNumber(); if (cor) { count++; } doc.getRange().replaceText("Y0" + i, cor ? " " : " "); doc.getRange().replaceText("B0" + i, cor ? 1 + "" : 0 + ""); } else { doc.getRange().replaceText("T" + i, answerModel.getQuestion().getText()); doc.getRange().replaceText("C" + i, answerModel.givenNumber() + ""); boolean cor = answerModel.correctNumber() == answerModel.givenNumber(); if (cor) { count++; } doc.getRange().replaceText("Y" + i, cor ? " " : " "); doc.getRange().replaceText("B" + i, cor ? 1 + "" : 0 + ""); } } doc.getRange().replaceText("BT", count + ""); doc.getRange().replaceText("BT", count + ""); FileOutputStream fos = new FileOutputStream("c:\\rep\\" + ticket.getId() + ".doc"); doc.write(fos); fos.close(); } catch (FileNotFoundException ex) { Logger.getLogger(ReportBean.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(ReportBean.class.getName()).log(Level.SEVERE, null, ex); } return ticket.getId() + ".doc"; }
From source file:sesiondataextractor.SesionDataExtractor.java
private static void getParticipaciones(List senadores) { File file = null;/*from www . ja v a 2s .c o m*/ WordExtractor extractor = null; try { file = new File("testdata/S 30.doc"); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument document = new HWPFDocument(fis); extractor = new WordExtractor(document); String[] paragraphList = extractor.getParagraphText(); String senadorName = ""; String senadorSpeech = ""; for (int i = 0; i < paragraphList.length; i++) { String paragraph = paragraphList[i]; if (paragraph != null) { //looks like the start of a new participation if (paragraph.indexOf(":") != -1) { //firstt persist the previous participation System.out.println("SENADOR:" + senadorName); // System.out.println("SPEECH"+senadorSpeech); Sesiones s = null; { TypedQuery<Sesiones> q; q = em.createNamedQuery("Sesiones.findBySesionId", Sesiones.class); q.setParameter("sesionId", 1); s = q.getSingleResult(); } Participaciones p = new Participaciones(); //now process the new paragraph senadorName = ""; senadorSpeech = ""; String speakerName = paragraph.substring(0, paragraph.indexOf(":")); if (speakerName.length() < SENADORA_PREFIX.length()) { //its a speech still senadorSpeech = senadorSpeech + paragraph; } else if (speakerName.equals(PRESIDENT_STRING)) { senadorName = PRESIDENT_STRING; } else if (speakerName.substring(0, SENADOR_PREFIX.length()).equals(SENADOR_PREFIX)) { //get the name int prefixLength = 0; if (speakerName.substring(0, SENADORA_PREFIX.length()).equals(SENADORA_PREFIX)) { prefixLength = SENADORA_PREFIX.length(); } else { prefixLength = SENADOR_PREFIX.length(); } senadorName = speakerName.substring(prefixLength); } else { // System.out.println("*****UNRECOGNIZED "+speakerName); } senadorSpeech = paragraph.substring(speakerName.length()); } else { //it is the continuation of a speech senadorSpeech = senadorSpeech + paragraph; } } } } catch (Exception exep) { exep.printStackTrace(); } }
From source file:textextractor.WordManager.java
public ArrayList extractDoc(FileInputStream fis) throws IOException { HWPFDocument doc = new HWPFDocument(fis); Range range = doc.getRange();/*from ww w. j a v a 2s .c o m*/ for (int i = 0; i < range.numParagraphs(); i++) { Paragraph p = range.getParagraph(i); StyleDescription style = doc.getStyleSheet().getStyleDescription(p.getStyleIndex()); if (!"Normal".equals(style.getName())) { System.out.println(style.getName()); } String[] ary = p.text().split(" "); System.out.println(p.text()); listDoc = new ArrayList(); listDoc.addAll(Arrays.asList(ary)); } return listDoc; }
From source file:ua.kiev.univ.linguistics.MSWordDocumentExtractor.java
License:Open Source License
public static Map<String, String> extract(String filename) throws IOException { HWPFDocument doc = new HWPFDocument(new FileInputStream(filename)); WordExtractor extractor = new WordExtractor(doc); String texts[] = extractor.getParagraphText(); List<PAPX> pars = doc.getParagraphTable().getParagraphs(); ParagraphProperties paragraphs[] = new ParagraphProperties[pars.size()]; for (int i = 0; i < paragraphs.length; i++) { paragraphs[i] = pars.get(i).getParagraphProperties(doc.getStyleSheet()); }//from ww w .ja va 2 s . c o m StringBuffer lt = new StringBuffer(); int stage = 0; String par[] = new String[3]; for (int i = 0; i < par.length; i++) { par[i] = ""; } for (int i = 0; i < Math.min(paragraphs.length, texts.length); i++) { ParagraphProperties paragraphProperties = paragraphs[i]; String text = texts[i]; if (text.matches("[\\s]*")) { continue; } int type = getType(paragraphProperties, text); switch (stage) { case 0: { if (type == 1) { par[0] += text + "\n"; } else if (type == 0) { par[1] += text + "\n"; stage = 2; } else if (type == -1) { par[2] += text + "\n"; stage = 2; } break; } case 2: { par[2] += text + "\n"; } } } Map<String, String> textBlocks = new TreeMap<String, String>(); if (!par[0].isEmpty()) { //par[0]=par[0].substring(0, par[0].length()-2); textBlocks.put(HEADER, trim(par[0])); } if (!par[1].isEmpty()) { //par[1]=par[1].substring(0, par[1].length()-2); textBlocks.put(TITLE, trim(par[1])); } if (!par[2].isEmpty()) { //par[2]=par[2].substring(0, par[2].length()-2); textBlocks.put(OTHER, trim(par[2])); } return textBlocks; }
From source file:uk.ac.liverpool.MSOffice.MSWord.java
License:Open Source License
@Override public SortedSet<IDInfo> getTypeInfo(Confidence min, Confidence max, String path, boolean complete) throws IOException { SortedSet<IDInfo> infos = validateParams(min, max); System.out.println(path);/* w w w .j a va 2 s.com*/ if (inRange(min, Confidence.SUFFIX, max)) { if (path != null && path.toLowerCase().endsWith("doc")) { infos.add(new IDInfo(Confidence.SUFFIX, this, MIME)); } } else if (inRange(min, Confidence.PARSE, max)) { InputUni uni = getInputUni(); try { POIFSFileSystem pfs = new POIFSFileSystem(uni.getInputStream()); new HWPFDocument(pfs); infos.add(new IDInfo(Confidence.PARSE, this, MIME)); } catch (Exception x) { x.printStackTrace(); } } return infos; }
From source file:uk.ac.liverpool.MSOffice.MSWord.java
License:Open Source License
@Override public Object parse(INode parent) throws Exception { Document doc = parent.getDocument(); // final StyleSheet ss = doc.getStyleSheet(); // CLGeneral gs = new CLGeneral(); // gs.setForeground(Colors.getColor(getAttr("foreground"), Color.BLACK)); // gs.setBackground(Colors.getColor(getAttr("background"), Color.LIGHT_GRAY)); // gs.setPadding(8); // ss.put(doc.getName(), gs); InputUni uni = getInputUni();//from w w w. ja v a 2 s .c o m HWPFDocument wor = (HWPFDocument) doc.getValue("worddoc"); if (wor == null) { System.out.println("new word extractor"); POIFSFileSystem pfs = new POIFSFileSystem(uni.getInputStreamRaw()); wor = new HWPFDocument(pfs); doc.putAttr("worddoc", wor); } return parseHelper(toHTML(parent), "HTML", getLayer(), parent); }