Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package de.unisaarland.swan.export; import de.unisaarland.swan.dao.AnnotationDAO; import de.unisaarland.swan.dao.LinkDAO; import de.unisaarland.swan.entities.*; import de.unisaarland.swan.export.model.xml.Annotation; import de.unisaarland.swan.export.model.xml.AnnotationSet; import de.unisaarland.swan.export.model.xml.Document; import de.unisaarland.swan.export.model.xml.Label; import de.unisaarland.swan.export.model.uima.SwanAnnotation; import de.unisaarland.swan.export.model.uima.SwanLabel; import de.unisaarland.swan.export.model.uima.SwanLink; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Marshaller; import ma.glasnost.orika.MapperFacade; import ma.glasnost.orika.MapperFactory; import ma.glasnost.orika.impl.DefaultMapperFactory; import org.apache.commons.io.FileUtils; import org.apache.uima.UIMAException; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.factory.TypeSystemDescriptionFactory; import org.apache.uima.fit.util.CasIOUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.EmptyFSList; import org.apache.uima.jcas.cas.FSList; import org.apache.uima.jcas.cas.NonEmptyFSList; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.metadata.TypeSystemDescription; import org.xml.sax.SAXException; /** * @author Timo Guehring * @author Annemarie Friedrich */ public class ExportUtil { private static MapperFactory mapperFactory = new DefaultMapperFactory.Builder().build(); private static MapperFacade mapperFacade = mapperFactory.getMapperFacade(); private static final Class SCHEME_EXPORT_CLASS = de.unisaarland.swan.export.model.xml.scheme.Scheme.class; final AnnotationDAO annotationDAO; final LinkDAO linkDAO; public ExportUtil(AnnotationDAO annotationDAO, LinkDAO linkDAO) { this.annotationDAO = annotationDAO; this.linkDAO = linkDAO; } /** * Returns a zip file containing one XMI per document, UIMA annotations are * created for each document for all annotators that were assigned to the * project. * * @param proj Project * @return zip file */ public File getExportDataInXMI(Project proj) { try { File zipFile = new File("swan_" + proj.getName() + ".zip"); ZipOutputStream zos = createZipOutputStream(zipFile); for (de.unisaarland.swan.entities.Document d : proj.getDocuments()) { File xmiFile = createXMIFileForDocument(d); createZipEntry(xmiFile, zos); } // Add type system for convenience File typeSystemFile = createTypeSystemFile(); createZipEntry(typeSystemFile, zos); zos.close(); return zipFile; } catch (FileNotFoundException ex) { Logger.getLogger(ExportUtil.class.getName()).log(Level.SEVERE, null, ex); } catch (UIMAException | SAXException | IOException ex) { Logger.getLogger(ExportUtil.class.getName()).log(Level.SEVERE, null, ex); } throw new RuntimeException("ExportUtil: Error while creating UIMA XMI files / zipping"); } private File createXMIFileForDocument(de.unisaarland.swan.entities.Document d) throws UIMAException, IOException { JCas jCas = createJCasForDocument(d); String filename = d.getName() + ".xmi"; File xmiFile = new File(filename); for (Users u : d.getProject().getUsers()) { Map<Long, SwanAnnotation> annotsById = new HashMap<>(); for (de.unisaarland.swan.entities.Annotation annotation : annotationDAO .getAllAnnotationsByUserIdDocId(u.getId(), d.getId())) { addAnnotationToJCas(annotation, annotsById, jCas); } for (de.unisaarland.swan.entities.Link link : linkDAO.getAllLinksByUserIdDocId(u.getId(), d.getId())) { addLinkToJCas(link, annotsById, jCas); } } CasIOUtil.writeXmi(jCas, xmiFile); return xmiFile; } private JCas createJCasForDocument(de.unisaarland.swan.entities.Document d) throws UIMAException { JCas jCas = JCasFactory.createJCas(); jCas.setDocumentLanguage("en"); jCas.setDocumentText(d.getText()); return jCas; } private void addAnnotationToJCas(de.unisaarland.swan.entities.Annotation annotation, Map<Long, SwanAnnotation> annotsById, JCas jCas) { SwanAnnotation dAnnot = new SwanAnnotation(jCas); dAnnot.setBegin(annotation.getStart()); dAnnot.setEnd(annotation.getEnd()); dAnnot.setAnnotatorId(annotation.getUser().getEmail()); dAnnot.setSpanType(annotation.getSpanType().getName()); dAnnot.setAnnotationId(annotation.getId().toString()); annotsById.put(annotation.getId(), dAnnot); Set<Label> labels = convertLabelsToExportLabelSet(annotation.getLabelMap()); FSList list = createLabelList(jCas, labels); dAnnot.setLabels(list); dAnnot.addToIndexes(); } private void addLinkToJCas(de.unisaarland.swan.entities.Link link, Map<Long, SwanAnnotation> annotsById, JCas jCas) { SwanLink dLink = new SwanLink(jCas); dLink.setLinkBegin(annotsById.get(link.getAnnotation1().getId())); dLink.setLinkEnd(annotsById.get(link.getAnnotation2().getId())); dLink.setAnnotatorId(link.getUser().getEmail()); Set<Label> labels = convertLinkLabelsToExportLabelSet(link.getLabelMap()); FSList list = createLabelList(jCas, labels); dLink.setLabels(list); dLink.addToIndexes(); // add as link to start annotation SwanAnnotation startAnnot = annotsById.get(link.getAnnotation1().getId()); FSList linkList = startAnnot.getLinks(); if (linkList == null) { linkList = new EmptyFSList(jCas); } NonEmptyFSList extendedList = new NonEmptyFSList(jCas); extendedList.setHead(dLink); extendedList.setTail(linkList); startAnnot.setLinks(extendedList); } /** * Returns a list containing uimaLabels for all labels in a given set * * @param jCas JCas * @param labels Set<Label> labels * @return list FSList */ private FSList createLabelList(JCas jCas, Set<Label> labels) { FSList list = new EmptyFSList(jCas); for (Label label : labels) { for (String labelName : label.getLabel()) { SwanLabel uimaLabel = new SwanLabel(jCas); uimaLabel.setName(labelName); uimaLabel.setLabelSet(label.getlabelSetName()); NonEmptyFSList extendedList = new NonEmptyFSList(jCas); extendedList.setHead(uimaLabel); extendedList.setTail(list); list = extendedList; } } return list; } private File createTypeSystemFile() throws IOException, ResourceInitializationException, SAXException { File typeSystemFile = new File("typesystem.xml"); TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescription(); OutputStream os = new FileOutputStream(typeSystemFile); tsd.toXML(os); os.close(); return typeSystemFile; } /** * Returns a zip file containing all annotation and link data by annotator * and document belonging to the given project: creates one XML file per * annotator. * * @param proj Project * @return zip file */ public File getExportDataInXML(Project proj) { try { File zipFile = new File("swan_" + proj.getName() + ".zip"); ZipOutputStream zos = createZipOutputStream(zipFile); for (de.unisaarland.swan.entities.Document d : proj.getDocuments()) { for (Users u : proj.getUsers()) { String fileName = proj.getName() + "_" + d.getName() + "_" + u.getEmail() + ".xml"; File docUserfile = new File(fileName); List<de.unisaarland.swan.entities.Annotation> annotations = annotationDAO .getAllAnnotationsByUserIdDocId(u.getId(), d.getId()); List<de.unisaarland.swan.entities.Link> links = linkDAO.getAllLinksByUserIdDocId(u.getId(), d.getId()); Document exportDoc = convertToExportDocument(d, annotations, links); marshalXMLToSingleFile(exportDoc, docUserfile); createZipEntry(docUserfile, zos); } } // Insert scheme marshalScheme(proj, zos); zos.close(); return zipFile; } catch (FileNotFoundException ex) { Logger.getLogger(ExportUtil.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(ExportUtil.class.getName()).log(Level.SEVERE, null, ex); } throw new RuntimeException("ExportUtil: Error while zipping data to XML"); } private ZipOutputStream createZipOutputStream(File file) throws FileNotFoundException { FileOutputStream fos = new FileOutputStream(file); BufferedOutputStream bos = new BufferedOutputStream(fos); return new ZipOutputStream(bos); } private void createZipEntry(File file, ZipOutputStream zos) throws IOException { zos.putNextEntry(new ZipEntry(file.getName())); zos.write(FileUtils.readFileToByteArray(file)); zos.closeEntry(); } private void marshalScheme(Project proj, ZipOutputStream zos) throws IOException { final Scheme schemeOrig = proj.getScheme(); final de.unisaarland.swan.export.model.xml.scheme.Scheme schemeExport = (de.unisaarland.swan.export.model.xml.scheme.Scheme) mapperFacade .map(schemeOrig, SCHEME_EXPORT_CLASS); String fileName = schemeExport.getName() + ".xml"; File schemefile = new File(fileName); marshalXMLToSingleFile(schemeExport, schemefile); zos.putNextEntry(new ZipEntry(fileName)); zos.write(FileUtils.readFileToByteArray(schemefile)); zos.closeEntry(); } /** * Writes the object Document into the file. * * @param o * @param file */ private void marshalXMLToSingleFile(Object o, File file) { try { JAXBContext jaxbContext = JAXBContext.newInstance(o.getClass()); Marshaller jaxbMarshaller = jaxbContext.createMarshaller(); jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true); jaxbMarshaller.marshal(o, file); return; } catch (JAXBException ex) { Logger.getLogger(ExportUtil.class.getName()).log(Level.SEVERE, null, ex); } throw new RuntimeException("ExportUtil: Something went wrong while marshalling the XML"); } private Document convertToExportDocument(de.unisaarland.swan.entities.Document d, List<de.unisaarland.swan.entities.Annotation> annotations, List<de.unisaarland.swan.entities.Link> links) { Document document = new Document(); document.setAnnotations(convertAnnotationsToAnnotationSet(annotations)); document.setLinks(convertLinksToLinkType(links)); return document; } private AnnotationSet convertAnnotationsToAnnotationSet( List<de.unisaarland.swan.entities.Annotation> annotations) { AnnotationSet annotationSet = new AnnotationSet(); Set<Annotation> annotatiosExport = new HashSet<>(); for (de.unisaarland.swan.entities.Annotation a : annotations) { annotatiosExport.add(convertAnnotationToExportAnnotation(a)); } annotationSet.setAnnotations(annotatiosExport); return annotationSet; } private Annotation convertAnnotationToExportAnnotation(de.unisaarland.swan.entities.Annotation a) { Annotation anno = new Annotation(); anno.setId(a.getId()); anno.setStart(a.getStart()); anno.setEnd(a.getEnd()); // anno.setText(a.getText()); anno.setSpanType(a.getSpanType().getName()); anno.setLabels(convertLabelsToExportLabelSet(a.getLabelMap())); return anno; } private Set<Label> convertLabelsToExportLabelSet(Set<LabelLabelSetMap> maps) { Set<Label> labels = new HashSet<>(); // collect selected labels per LabelSet Map<String, Set<String>> setToLabelsMap = new HashMap<>(); for (LabelLabelSetMap m : maps) { for (de.unisaarland.swan.entities.LabelSet s : m.getLabelSets()) { if (!setToLabelsMap.containsKey(s.getName())) { setToLabelsMap.put(s.getName(), new HashSet<String>()); } setToLabelsMap.get(s.getName()).add(m.getLabel().getName()); } } // TODO duplicate code for (String labelSetName : setToLabelsMap.keySet()) { Label annotatedLabel = new Label(); annotatedLabel.setLabel(setToLabelsMap.get(labelSetName)); annotatedLabel.setLabelSetName(labelSetName); labels.add(annotatedLabel); } return labels; } private de.unisaarland.swan.export.model.xml.LinkType convertLinksToLinkType( List<de.unisaarland.swan.entities.Link> links) { de.unisaarland.swan.export.model.xml.LinkType linkType = new de.unisaarland.swan.export.model.xml.LinkType(); Set<de.unisaarland.swan.export.model.xml.Link> newLinks = new HashSet<>(); for (de.unisaarland.swan.entities.Link l : links) { newLinks.add(convertLinkToExportLink(l)); } linkType.setLinks(newLinks); return linkType; } private de.unisaarland.swan.export.model.xml.Link convertLinkToExportLink( de.unisaarland.swan.entities.Link link) { de.unisaarland.swan.export.model.xml.Link newLink = new de.unisaarland.swan.export.model.xml.Link(); newLink.setFrom(link.getAnnotation1().getId()); newLink.setTo(link.getAnnotation2().getId()); newLink.setLabels(convertLinkLabelsToExportLabelSet(link.getLabelMap())); return newLink; } private Set<Label> convertLinkLabelsToExportLabelSet(Set<LinkLabelLinkTypeMap> maps) { Set<Label> labels = new HashSet<>(); // collect selected labels per LabelSet Map<String, Set<String>> setToLabelsMap = new HashMap<>(); for (LinkLabelLinkTypeMap m : maps) { for (de.unisaarland.swan.entities.LinkType s : m.getLinkTypes()) { if (!setToLabelsMap.containsKey(s.getName())) { setToLabelsMap.put(s.getName(), new HashSet<String>()); } setToLabelsMap.get(s.getName()).add(m.getLabel().getName()); } } // TODO duplicate code for (String labelSetName : setToLabelsMap.keySet()) { Label annotatedLabel = new Label(); annotatedLabel.setLabel(setToLabelsMap.get(labelSetName)); annotatedLabel.setLabelSetName(labelSetName); labels.add(annotatedLabel); } return labels; } }