Java tutorial
package elaborate.editor.export.tei; /* * #%L * elab4-backend * ======= * Copyright (C) 2011 - 2016 Huygens ING * ======= * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/gpl-3.0.html>. * #L% */ import java.io.StringWriter; import java.text.MessageFormat; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import javax.persistence.EntityManager; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.w3c.dom.Comment; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.Text; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import elaborate.editor.model.orm.Facsimile; import elaborate.editor.model.orm.Project; import elaborate.editor.model.orm.ProjectEntry; import elaborate.editor.model.orm.ProjectEntryMetadataItem; import elaborate.editor.model.orm.ProjectMetadataItem; import elaborate.editor.model.orm.Transcription; import elaborate.editor.model.orm.service.ProjectService; import nl.knaw.huygens.facetedsearch.SolrUtils; public class TeiMaker { public static final Map<String, String> HI_TAGS = ImmutableMap.<String, String>builder()// .put("strong", "bold")// .put("b", "bold")// .put("u", "underline")// .put("em", "italic")// .put("i", "italic")// .put("sub", "subscript")// .put("sup", "superscript")// .build(); public static final String INTERP_GRP = "interpGrp"; private Document tei; private final Project project; private final TeiConversionConfig config; private final EntityManager entityManager; public TeiMaker(Project _project, TeiConversionConfig _config, EntityManager entityManager) { this.project = _project; this.config = _config; this.entityManager = entityManager; if (_project == null) { tei = null; } else { tei = createTeiDocument(); Element root = tei.createElement("TEI"); tei.appendChild(root); Element header = createHeader(); root.appendChild(header); ProjectService projectService = ProjectService.instance(); projectService.setEntityManager(entityManager); List<ProjectEntry> projectEntriesInOrder = projectService.getProjectEntriesInOrder(project.getId()); Element facsimile = createFacsimile(projectEntriesInOrder); root.appendChild(facsimile); Element text = createText(projectEntriesInOrder); root.appendChild(text); } } public String toXML() { if (tei == null) { return null; } TransformerFactory transfac = TransformerFactory.newInstance(); try { DOMSource source = new DOMSource(tei); Transformer trans = transfac.newTransformer(); trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); trans.setOutputProperty(OutputKeys.INDENT, "no"); StringWriter sw = new StringWriter(); StreamResult result = new StreamResult(sw); trans.transform(source, result); return sw.toString().replace("interpgrp>", "interpGrp>").replaceAll(" +<lb/>", "<lb/>"); } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } return null; } private Element createFacsimile(List<ProjectEntry> projectEntriesInOrder) { Element facsimileElement = tei.createElement("facsimile"); for (ProjectEntry entry : projectEntriesInOrder) { int n = 1; for (Facsimile facsimile : entry.getFacsimiles()) { Element surfaceElement = tei.createElement("surface"); surfaceElement.setAttribute("xml:id", "facs-" + entry.getShortName() + "-" + n++); surfaceElement.setAttribute("n", facsimile.getName()); Element graphicElement = tei.createElement("graphic"); graphicElement.setAttribute("url", facsimile.getZoomableUrl()); surfaceElement.appendChild(graphicElement); facsimileElement.appendChild(surfaceElement); } } return facsimileElement; } private Element createText(List<ProjectEntry> entries) { int pageno = 1; Element text = tei.createElement("text"); Element interpGrp = createProjectInterpGrp(); if (interpGrp != null) { text.appendChild(interpGrp); } if (config.getGroupTextsByMetadata() != null) { Element group = tei.createElement("group"); text.appendChild(group); // TODO implement grouping } else { Element body = tei.createElement("body"); text.appendChild(body); String currentFolio = ""; for (ProjectEntry entry : entries) { // if (entry.hasTranscriptions()) { pageno = processEntry(pageno, body, currentFolio, entry); // } } } return text; } private Element createProjectInterpGrp() { List<ProjectMetadataItem> projectMetadataItems = project.getProjectMetadataItems(); Element interpGrp = tei.createElement(INTERP_GRP); interpGrp.appendChild(interp("title", project.getTitle())); for (ProjectMetadataItem projectMetadataItem : projectMetadataItems) { String type = projectMetadataItem.getField(); String value = projectMetadataItem.getData(); interpGrp.appendChild(interp(type, value)); } return interpGrp; } private Element interp(String type, String value) { Element interp = tei.createElement("interp"); interp.setAttribute("type", SolrUtils.normalize(type)); interp.setAttribute("value", StringEscapeUtils.escapeHtml(value)); return interp; } private static final Comparator<Transcription> ORDER_BY_TYPE = new Comparator<Transcription>() { @Override public int compare(Transcription t1, Transcription t2) { Long tt1 = t1.getTranscriptionType().getId(); Long tt2 = t2.getTranscriptionType().getId(); return tt1.compareTo(tt2); } }; private int processEntry(int _pageno, Element body, String _currentFolio, ProjectEntry projectEntry) { int pageno = _pageno; String currentFolio = _currentFolio; String folio = StringUtils.defaultIfBlank(projectEntry.getMetadataValue("Folio number"), "") + StringUtils.defaultIfBlank(projectEntry.getMetadataValue("Folio side"), ""); if (!currentFolio.equals(folio)) { pageno = addPb(body, pageno, projectEntry, folio); } // addCb(body, projectEntry); currentFolio = folio; Element entryDiv = tei.createElement("div"); entryDiv.setAttribute("xml:id", "e" + projectEntry.getId()); entryDiv.setAttribute("n", projectEntry.getName()); addEntryInterpGrp(entryDiv, projectEntry); List<Transcription> orderedTranscriptions = Lists.newArrayList(projectEntry.getTranscriptions()); Collections.sort(orderedTranscriptions, ORDER_BY_TYPE); for (Transcription transcription : orderedTranscriptions) { HtmlTeiConverter htmlTeiConverter = new HtmlTeiConverter(transcription.getBody(), config, transcription.getTranscriptionType().getName(), entityManager); Node transcriptionNode = htmlTeiConverter.getContent(); Node importedTranscriptionNode = tei.importNode(transcriptionNode, true); Node child = importedTranscriptionNode.getFirstChild(); while (child != null) { Node nextSibling = child.getNextSibling(); if (child.getNodeName().equals("div") && child.hasChildNodes()) { entryDiv.appendChild(child); } child = nextSibling; } } body.appendChild(entryDiv); return pageno; } private void addEntryInterpGrp(Element entryDiv, ProjectEntry projectEntry) { Map<String, String> metaMap = Maps.newHashMap(); List<String> metadataToInclude = ImmutableList .copyOf(projectEntry.getProject().getProjectEntryMetadataFieldnames()); for (ProjectEntryMetadataItem meta : projectEntry.getProjectEntryMetadataItems()) { if (metadataToInclude.contains(meta.getField())) { metaMap.put(SolrUtils.normalize(meta.getField()), meta.getData()); } } if (!metaMap.isEmpty()) { Element interpGrp = tei.createElement(INTERP_GRP); for (Entry<String, String> entry : metaMap.entrySet()) { Element interp = tei.createElement("interp"); interp.setAttribute("type", entry.getKey()); interp.setAttribute("value", StringEscapeUtils.escapeHtml(entry.getValue())); interpGrp.appendChild(interp); } entryDiv.appendChild(interpGrp); } } private int addPb(Element body, int _pageno, ProjectEntry projectEntry, String folio) { int pageno = _pageno; Element pb = tei.createElement("pb"); if (StringUtils.isNotEmpty(folio)) { pb.setAttribute("f", folio); } pb.setAttribute("facs", "#facs-" + projectEntry.getShortName() + "-" + pageno); pb.setAttribute("n", String.valueOf(pageno++)); body.appendChild(pb); return pageno; } private Element createHeader() { Element header = tei.createElement("teiHeader"); Element fileDesc = tei.createElement("fileDesc"); Element titleStmt = tei.createElement("titleStmt"); Element title = tei.createElement("title"); Comment ordering = tei.createComment(MessageFormat.format("ordering: {0} / {1} / {2}", project.getLevel1(), project.getLevel2(), project.getLevel3())); Text textNode = tei.createTextNode(project.getTitle()); title.appendChild(textNode); titleStmt.appendChild(title); fileDesc.appendChild(titleStmt); header.appendChild(fileDesc); header.appendChild(ordering); return header; } static Document createTeiDocument() { DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); try { DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); Document teiDocument = docBuilder.newDocument(); return teiDocument; } catch (ParserConfigurationException e) { e.printStackTrace(); } return null; } }