Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.similarity.apps.solr; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.math.BigInteger; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import javax.xml.bind.JAXBException; import net.billylieurance.azuresearch.AzureSearchImageResult; import net.billylieurance.azuresearch.AzureSearchResultSet; import net.billylieurance.azuresearch.AzureSearchWebResult; import org.apache.commons.lang.StringUtils; import org.docx4j.XmlUtils; import org.docx4j.dml.wordprocessingDrawing.Inline; import org.docx4j.jaxb.Context; import org.docx4j.openpackaging.exceptions.Docx4JException; import org.docx4j.openpackaging.exceptions.InvalidFormatException; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage; import org.docx4j.openpackaging.parts.WordprocessingML.EndnotesPart; import org.docx4j.wml.CTEndnotes; import org.docx4j.wml.CTFtnEdn; import org.docx4j.wml.Drawing; import org.docx4j.wml.P; import org.docx4j.wml.R; import opennlp.tools.similarity.apps.BingQueryRunner; import opennlp.tools.similarity.apps.Fragment; import opennlp.tools.similarity.apps.HitBase; public class WordDocBuilderEndNotes extends WordDocBuilderSingleImageSearchCall { public String buildWordDoc(List<HitBase> content, String title) { String outputDocFinename = absPath + "written/" + title.replace(' ', '_').replace('\"', ' ').trim() + ".docx"; WordprocessingMLPackage wordMLPackage = null; List<String> imageURLs = getAllImageSearchResults(title); int count = 0; BigInteger refId = BigInteger.ONE; try { wordMLPackage = WordprocessingMLPackage.createPackage(); CTEndnotes endnotes = null; try { EndnotesPart ep = new EndnotesPart(); endnotes = Context.getWmlObjectFactory().createCTEndnotes(); ep.setJaxbElement(endnotes); wordMLPackage.getMainDocumentPart().addTargetPart(ep); } catch (InvalidFormatException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Title", title.toUpperCase()); for (HitBase para : content) { if (para.getFragments() == null || para.getFragments().size() < 1) // no found content in this hit continue; try { String processedParaTitle = processParagraphTitle(para.getTitle()); if (processedParaTitle != null && !processedParaTitle.endsWith("..") || StringUtils.isAlphanumeric(processedParaTitle)) { wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", processedParaTitle); } String paraText = processParagraphText(para.getFragments().toString()); wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText); CTFtnEdn endnote = Context.getWmlObjectFactory().createCTFtnEdn(); endnotes.getEndnote().add(endnote); endnote.setId(refId); refId.add(BigInteger.ONE); String url = para.getUrl(); String endnoteBody = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" ><w:pPr><w:pStyle w:val=\"EndnoteText\"/></w:pPr><w:r><w:rPr>" + "<w:rStyle w:val=\"EndnoteReference\"/></w:rPr><w:endnoteRef/></w:r><w:r><w:t xml:space=\"preserve\"> " + url + "</w:t></w:r></w:p>"; try { endnote.getEGBlockLevelElts().add(XmlUtils.unmarshalString(endnoteBody)); } catch (JAXBException e) { // TODO Auto-generated catch block e.printStackTrace(); } // Add the body text referencing it String docBody = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" ><w:r><w:t>"//+ paraText /*+ refId.toString()*/ + "</w:t></w:r><w:r><w:rPr><w:rStyle w:val=\"EndnoteReference\"/></w:rPr><w:endnoteReference w:id=\"" + refId.toString() + "\"/></w:r></w:p>"; try { wordMLPackage.getMainDocumentPart().addParagraph(docBody); } catch (JAXBException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { addImageByImageURLToPackage(count, wordMLPackage, imageURLs); } catch (Exception e) { // no need to report issues //e.printStackTrace(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } count++; } // now add URLs wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", "REFERENCES"); for (HitBase para : content) { if (para.getFragments() == null || para.getFragments().size() < 1) // no found content in this hit continue; try { wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", para.getTitle()); String paraText = para.getUrl(); wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { wordMLPackage.save(new File(outputDocFinename)); System.out.println("Finished creating docx =" + outputDocFinename); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } try { String fileNameToDownload = "/var/www/wrt_latest/" + title.replace(' ', '_').replace('\"', ' ').trim() + ".docx"; wordMLPackage.save(new File(fileNameToDownload)); System.out.println("Wrote a doc for download :" + fileNameToDownload); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return outputDocFinename; } public static String processParagraphText(String title) { return title.replace("[", "").replace("]", "").replace(" | ", "").replace(".,", ".").replace(".\"", "\"") .replace(". .", ".").replace(",.", "."); } public static String processParagraphTitle(String title) { String titleDelim = title.replace('-', '&').replace('|', '&'); String[] titleParts = titleDelim.split("&"); int lenCurr = -1; String bestPart = null; for (String candidatePart : titleParts) { // if this part longer and does not have periods if (lenCurr < candidatePart.length() && candidatePart.indexOf('.') < 0) { lenCurr = candidatePart.length(); bestPart = candidatePart; } } return bestPart; } public static void main(String[] args) { WordDocBuilderEndNotes b = new WordDocBuilderEndNotes(); List<HitBase> content = new ArrayList<HitBase>(); for (int i = 0; i < 10; i++) { HitBase h = new HitBase(); h.setTitle("albert einstein " + i); List<Fragment> frs = new ArrayList<Fragment>(); frs.add(new Fragment(" content " + i, 0)); h.setFragments(frs); h.setUrl("http://www." + i + ".com"); content.add(h); } b.buildWordDoc(content, "albert einstein"); } }