Example usage for org.apache.poi.hwpf HWPFDocument HWPFDocument

List of usage examples for org.apache.poi.hwpf HWPFDocument HWPFDocument

Introduction

In this page you can find the example usage for org.apache.poi.hwpf HWPFDocument HWPFDocument.

Prototype

public HWPFDocument(DirectoryNode directory) throws IOException 

Source Link

Document

This constructor loads a Word document from a specific point in a POIFSFileSystem, probably not the default.

Usage

From source file:uk.bl.wa.tika.parser.ole2.OLE2Parser.java

License:Open Source License

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    HWPFDocument doc = new HWPFDocument(stream);
    System.out.println("ApplicationName: " + doc.getSummaryInformation().getApplicationName());
    System.out.println("OSVersion: " + doc.getSummaryInformation().getOSVersion());
    System.out.println("# paragraphs: " + doc.getDocumentSummaryInformation().getParCount());
    System.out.println("# bytes: " + doc.getDocumentSummaryInformation().getByteCount());
    System.out.println("# hidden: " + doc.getDocumentSummaryInformation().getHiddenCount());
    System.out.println("# lines: " + doc.getDocumentSummaryInformation().getLineCount());
    System.out.println("# mmclips: " + doc.getDocumentSummaryInformation().getMMClipCount());
    System.out.println("# notes: " + doc.getDocumentSummaryInformation().getNoteCount());
    System.out.println("# sections: " + doc.getDocumentSummaryInformation().getSectionCount());
    System.out.println("# slides: " + doc.getDocumentSummaryInformation().getSlideCount());
    System.out.println("format: " + doc.getDocumentSummaryInformation().getFormat());
    for (TextPiece tp : doc.getTextTable().getTextPieces()) {
        System.out.println("TP: " + tp.getStringBuffer().substring(0, 100));
        System.out.println("TP: " + tp.getPieceDescriptor().isUnicode());
    }/*from   w  w  w . j a  v a  2s.  c  om*/
    for (Object os : doc.getDocumentSummaryInformation().getSections()) {
        Section s = (Section) os;
        System.out.println("ss# fid: " + s.getFormatID());
        System.out.println("ss# codepage: " + s.getCodepage());
        System.out.println("ss# # properties: " + s.getPropertyCount());
        for (Property sp : s.getProperties()) {
            System.out.println(
                    "ss# property: " + sp.getValue().getClass().getCanonicalName() + " " + sp.getValue());
        }
    }
    for (Ffn f : doc.getFontTable().getFontNames()) {
        System.out.println("Font: " + f.getMainFontName() + ", " + f.getSize() + ", " + f.getWeight());
    }
    parseCompObj(stream);

    // This
    POIFSFileSystem fs = new POIFSFileSystem(stream);

    DirectoryEntry root = fs.getRoot();

    dump(root);

}

From source file:usac.centrocalculo.data.LectorFormaCeroCuatroUno.java

public void cargaArchivos(String docs) {

    File file = null;/*from   www. j a  v  a  2  s  .  co  m*/
    WordExtractor extractor = null;
    try {

        file = new File(docs);
        FileInputStream fis = new FileInputStream(file.getAbsolutePath());
        HWPFDocument document = new HWPFDocument(fis);
        extractor = new WordExtractor(document);
        String fileData2 = extractor.getText();
        String dependencia = this.getDato("DEPENDENCIA", "TITULO DEL PUESTO", fileData2);
        String puesto = this.getDato("TITULO DEL PUESTO", "No. PLAZA", fileData2);
        String noPlaza = this.getDato("No. PLAZA", "NOMBRE DEL TITULAR", fileData2);
        String nombrePersonaTitular = this.getDato("NOMBRE DEL TITULAR", "MOTIVO DE LA EMERGENCIA", fileData2);
        String motivoEmergencia = this.getDato("MOTIVO DE LA EMERGENCIA", "dkljafsld;kjfa;lsdkjf", fileData2);
        String nombrePersonaContratada = this.getDato("NOMBRE DE LA PERSONA CONTRATADA",
                "VIGENCIA DEL CONTRATO", fileData2);
        String vigenciaContratoInicio = this.getDato("VIGENCIA DEL CONTRATO", " AL ", fileData2);
        // String vigenciaContratoFin=this.getDato(" AL ", "ESTUDIOS REALIZADOS", fileData2);
        String estudiosRealizados = this.getDato("ESTUDIOS REALIZADOS", "EXPERIENCIA LABORAL", fileData2);
        //String experienciaLaboral=this.getDato("EXPERIENCIA LABORAL", "DEPENDENCIA", fileData2);
        String vigenciaAutorizadaInicio = this.getDato("VIGENCIA AUTORIZADA DEL", "AL", fileData2);
        //String vigenciaAutorizadaFin=this.getDato("AL", "OBSERVACIONES", fileData2);
        String observaciones = this.getDato("OBSERVACIONES", "Fecha Recibido", fileData2);
        String fechaRecibido = this.getDato("Fecha Recibido", "(f) Profesional de R.R.H.H", fileData2);
        System.out.println(
                "----------------------------------------------------------------------------------------------------------------");
        System.out.println(dependencia.trim());
        System.out.println(puesto.trim());
        System.out.println(noPlaza.trim());
        System.out.println(nombrePersonaTitular.trim());
        System.out.println(motivoEmergencia.trim());
        System.out.println(nombrePersonaContratada.trim());
        System.out.println(vigenciaContratoInicio.trim());
        // System.out.println(vigenciaContratoFin.trim());
        System.out.println(estudiosRealizados.trim());
        //System.out.println(experienciaLaboral.trim());
        System.out.println(vigenciaAutorizadaInicio.trim());
        //System.out.println(vigenciaAutorizadaFin.trim());
        System.out.println(observaciones.trim());
        System.out.println(fechaRecibido.trim());
    } catch (Exception exep) {
        exep.printStackTrace();
    }
}

From source file:util.DocumentFunction.java

public static String readDocFile(String fileName) {
    StringBuilder text = new StringBuilder();
    try {//from  ww  w. ja va2s .  c om
        File file = new File(fileName);
        FileInputStream fis = new FileInputStream(file.getAbsolutePath());

        HWPFDocument doc = new HWPFDocument(fis);

        WordExtractor we = new WordExtractor(doc);

        String[] paragraphs = we.getParagraphText();

        //System.out.println("Total no of paragraph "+paragraphs.length);
        for (String para : paragraphs) {
            text.append(para.toString() + "\n");
        }
        fis.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return text.toString();
}