List of usage examples for org.apache.poi.hwpf HWPFDocument HWPFDocument
public HWPFDocument(DirectoryNode directory) throws IOException
From source file:uk.bl.wa.tika.parser.ole2.OLE2Parser.java
License:Open Source License
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { HWPFDocument doc = new HWPFDocument(stream); System.out.println("ApplicationName: " + doc.getSummaryInformation().getApplicationName()); System.out.println("OSVersion: " + doc.getSummaryInformation().getOSVersion()); System.out.println("# paragraphs: " + doc.getDocumentSummaryInformation().getParCount()); System.out.println("# bytes: " + doc.getDocumentSummaryInformation().getByteCount()); System.out.println("# hidden: " + doc.getDocumentSummaryInformation().getHiddenCount()); System.out.println("# lines: " + doc.getDocumentSummaryInformation().getLineCount()); System.out.println("# mmclips: " + doc.getDocumentSummaryInformation().getMMClipCount()); System.out.println("# notes: " + doc.getDocumentSummaryInformation().getNoteCount()); System.out.println("# sections: " + doc.getDocumentSummaryInformation().getSectionCount()); System.out.println("# slides: " + doc.getDocumentSummaryInformation().getSlideCount()); System.out.println("format: " + doc.getDocumentSummaryInformation().getFormat()); for (TextPiece tp : doc.getTextTable().getTextPieces()) { System.out.println("TP: " + tp.getStringBuffer().substring(0, 100)); System.out.println("TP: " + tp.getPieceDescriptor().isUnicode()); }/*from w w w . j a v a 2s. c om*/ for (Object os : doc.getDocumentSummaryInformation().getSections()) { Section s = (Section) os; System.out.println("ss# fid: " + s.getFormatID()); System.out.println("ss# codepage: " + s.getCodepage()); System.out.println("ss# # properties: " + s.getPropertyCount()); for (Property sp : s.getProperties()) { System.out.println( "ss# property: " + sp.getValue().getClass().getCanonicalName() + " " + sp.getValue()); } } for (Ffn f : doc.getFontTable().getFontNames()) { System.out.println("Font: " + f.getMainFontName() + ", " + f.getSize() + ", " + f.getWeight()); } parseCompObj(stream); // This POIFSFileSystem fs = new POIFSFileSystem(stream); DirectoryEntry root = fs.getRoot(); dump(root); }
From source file:usac.centrocalculo.data.LectorFormaCeroCuatroUno.java
public void cargaArchivos(String docs) { File file = null;/*from www. j a v a 2 s . co m*/ WordExtractor extractor = null; try { file = new File(docs); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument document = new HWPFDocument(fis); extractor = new WordExtractor(document); String fileData2 = extractor.getText(); String dependencia = this.getDato("DEPENDENCIA", "TITULO DEL PUESTO", fileData2); String puesto = this.getDato("TITULO DEL PUESTO", "No. PLAZA", fileData2); String noPlaza = this.getDato("No. PLAZA", "NOMBRE DEL TITULAR", fileData2); String nombrePersonaTitular = this.getDato("NOMBRE DEL TITULAR", "MOTIVO DE LA EMERGENCIA", fileData2); String motivoEmergencia = this.getDato("MOTIVO DE LA EMERGENCIA", "dkljafsld;kjfa;lsdkjf", fileData2); String nombrePersonaContratada = this.getDato("NOMBRE DE LA PERSONA CONTRATADA", "VIGENCIA DEL CONTRATO", fileData2); String vigenciaContratoInicio = this.getDato("VIGENCIA DEL CONTRATO", " AL ", fileData2); // String vigenciaContratoFin=this.getDato(" AL ", "ESTUDIOS REALIZADOS", fileData2); String estudiosRealizados = this.getDato("ESTUDIOS REALIZADOS", "EXPERIENCIA LABORAL", fileData2); //String experienciaLaboral=this.getDato("EXPERIENCIA LABORAL", "DEPENDENCIA", fileData2); String vigenciaAutorizadaInicio = this.getDato("VIGENCIA AUTORIZADA DEL", "AL", fileData2); //String vigenciaAutorizadaFin=this.getDato("AL", "OBSERVACIONES", fileData2); String observaciones = this.getDato("OBSERVACIONES", "Fecha Recibido", fileData2); String fechaRecibido = this.getDato("Fecha Recibido", "(f) Profesional de R.R.H.H", fileData2); System.out.println( "----------------------------------------------------------------------------------------------------------------"); System.out.println(dependencia.trim()); System.out.println(puesto.trim()); System.out.println(noPlaza.trim()); System.out.println(nombrePersonaTitular.trim()); System.out.println(motivoEmergencia.trim()); System.out.println(nombrePersonaContratada.trim()); System.out.println(vigenciaContratoInicio.trim()); // System.out.println(vigenciaContratoFin.trim()); System.out.println(estudiosRealizados.trim()); //System.out.println(experienciaLaboral.trim()); System.out.println(vigenciaAutorizadaInicio.trim()); //System.out.println(vigenciaAutorizadaFin.trim()); System.out.println(observaciones.trim()); System.out.println(fechaRecibido.trim()); } catch (Exception exep) { exep.printStackTrace(); } }
From source file:util.DocumentFunction.java
public static String readDocFile(String fileName) { StringBuilder text = new StringBuilder(); try {//from ww w. ja va2s . c om File file = new File(fileName); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument doc = new HWPFDocument(fis); WordExtractor we = new WordExtractor(doc); String[] paragraphs = we.getParagraphText(); //System.out.println("Total no of paragraph "+paragraphs.length); for (String para : paragraphs) { text.append(para.toString() + "\n"); } fis.close(); } catch (Exception e) { e.printStackTrace(); } return text.toString(); }