Java tutorial
/** * Copyright 2009 Humboldt-Universitt zu Berlin, INRIA. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * */ package org.corpus_tools.peppermodules.exmaralda; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringEscapeUtils; import org.corpus_tools.pepper.common.DOCUMENT_STATUS; import org.corpus_tools.pepper.impl.PepperMapperImpl; import org.corpus_tools.pepper.modules.exceptions.PepperModuleDataException; import org.corpus_tools.salt.SALT_TYPE; import org.corpus_tools.salt.SaltFactory; import org.corpus_tools.salt.common.SDocument; import org.corpus_tools.salt.common.STextualDS; import org.corpus_tools.salt.common.STextualRelation; import org.corpus_tools.salt.common.STimeline; import org.corpus_tools.salt.common.SToken; import org.corpus_tools.salt.common.SStructure; import org.corpus_tools.salt.common.SStructuredNode; import org.corpus_tools.salt.core.SAnnotation; import org.corpus_tools.salt.core.SMetaAnnotation; import org.corpus_tools.salt.util.DataSourceSequence; import org.eclipse.emf.ecore.resource.Resource; import org.eclipse.emf.ecore.resource.ResourceSet; import org.eclipse.emf.ecore.resource.impl.ResourceSetImpl; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.BasicTranscription; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.CommonTimeLine; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.EVENT_MEDIUM; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.Event; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.ExmaraldaBasicFactory; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.MetaInformation; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.TIER_TYPE; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.TLI; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.Tier; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.UDInformation; import de.hu_berlin.german.korpling.saltnpepper.misc.exmaralda.resources.EXBResourceFactory; import java.util.TreeMap; public class Salt2EXMARaLDAMapper extends PepperMapperImpl { // -------------------- basic transcription public void setBasicTranscription(BasicTranscription basicTranscription) { this.basicTranscription = basicTranscription; } public BasicTranscription getBasicTranscription() { return basicTranscription; } private BasicTranscription basicTranscription = null; // -------------------- basic transcription // -------------------- start: helping structures private List<TLI2PointOfTime> tLI2PointOfTimeList = new ArrayList<TLI2PointOfTime>(); private class TLI2PointOfTime { public TLI tli = null; public String pointOfTime = null; } private TLI getTLI(String sPointOfTime) { TLI retVal = null; for (TLI2PointOfTime tli2pot : tLI2PointOfTimeList) { if (tli2pot.pointOfTime.equalsIgnoreCase(sPointOfTime)) { retVal = tli2pot.tli; break; } } return (retVal); } // -------------------- end: helping structures @Override public DOCUMENT_STATUS mapSCorpus() { if (getResourceURI() != null) { File resourceFile = new File(getResourceURI().toFileString()); resourceFile.mkdirs(); } return (DOCUMENT_STATUS.COMPLETED); } /** * {@inheritDoc PepperMapper#setDocument(SDocument)} * * OVERRIDE THIS METHOD FOR CUSTOMIZED MAPPING. */ @Override public DOCUMENT_STATUS mapSDocument() { if (getDocument().getDocumentGraph() == null) { getDocument().setDocumentGraph(SaltFactory.createSDocumentGraph()); } if (getResourceURI() != null) { File resourceFile = new File(getResourceURI().toFileString()); resourceFile.getParentFile().mkdirs(); } this.setBasicTranscription(ExmaraldaBasicFactory.eINSTANCE.createBasicTranscription()); // mapping for MetaInformation MetaInformation metaInformation = ExmaraldaBasicFactory.eINSTANCE.createMetaInformation(); basicTranscription.setMetaInformation(metaInformation); this.mapSDocuent2MetaInfo(getDocument(), metaInformation); // creating timeline if (this.getDocument().getDocumentGraph().getTimeline() == null) { // if no timeline is included, create one SDocumentDataEnricher getDocument().getDocumentGraph().createTimeline(); } CommonTimeLine cTimeLine = ExmaraldaBasicFactory.eINSTANCE.createCommonTimeLine(); basicTranscription.setCommonTimeLine(cTimeLine); this.map2CommonTimeLine(getDocument().getDocumentGraph().getTimeline(), cTimeLine); // creating token tier Tier tokenTier = ExmaraldaBasicFactory.eINSTANCE.createTier(); basicTranscription.getTiers().add(tokenTier); this.mapSToken2Tier(getDocument().getDocumentGraph().getTokens(), tokenTier); // map all SStructuredNodes to tiers List<SStructuredNode> structuredNodes = new ArrayList<>(); // add all SToken to mapping list structuredNodes.addAll(getDocument().getDocumentGraph().getTokens()); // add all SToken to mapping list structuredNodes.addAll(getDocument().getDocumentGraph().getSpans()); // add all SToken to mapping list structuredNodes.addAll(getDocument().getDocumentGraph().getStructures()); // map this.mapSStructuredNode2Tiers(structuredNodes); saveToFile(basicTranscription); return (DOCUMENT_STATUS.COMPLETED); } private void saveToFile(BasicTranscription basicTranscription) { // create resource set and resource ResourceSet resourceSet = new ResourceSetImpl(); // Register XML resource factory resourceSet.getResourceFactoryRegistry().getExtensionToFactoryMap().put(EXMARaLDAExporter.FILE_EXTENION, new EXBResourceFactory()); // load resource Resource resource = resourceSet.createResource(getResourceURI()); if (resource == null) { throw new PepperModuleDataException(this, "Cannot save a resource to uri '" + getResourceURI() + "', because the given resource is null."); } resource.getContents().add(basicTranscription); try { resource.save(null); } catch (IOException e) { throw new PepperModuleDataException(this, "Cannot write exmaradla basic transcription to uri '" + getResourceURI() + "'.", e); } } /** * Maps all SMetaAnnotations of document to MetaInformation or UDInformation * * @param sDoc * @param metaInfo */ private void mapSDocuent2MetaInfo(SDocument sDoc, MetaInformation metaInfo) { // map SMeatAnnotations2udInformation for (SMetaAnnotation sMetaAnno : sDoc.getMetaAnnotations()) { // map project name if (sMetaAnno.getName().equalsIgnoreCase(EXBNameIdentifier.KW_EXB_PROJECT_NAME)) { metaInfo.setProjectName(sMetaAnno.getValue().toString()); } else if (sMetaAnno.getName().equalsIgnoreCase(EXBNameIdentifier.KW_EXB_TRANSCRIPTION_NAME)) { // map transcription name metaInfo.setTranscriptionName(sMetaAnno.getValue().toString()); } else if (sMetaAnno.getName().equalsIgnoreCase(EXBNameIdentifier.KW_EXB_REFERENCED_FILE)) { // map referenced file metaInfo.setReferencedFile(sMetaAnno.getValue().toString()); } else if (sMetaAnno.getName().equalsIgnoreCase(EXBNameIdentifier.KW_EXB_COMMENT)) { metaInfo.setComment(sMetaAnno.getValue().toString()); } else if (sMetaAnno.getName().equalsIgnoreCase(EXBNameIdentifier.KW_EXB_TRANSCRIPTION_CONVENTION)) { // map transcription convention metaInfo.setTranscriptionConvention(sMetaAnno.getValue().toString()); } else { UDInformation udInfo = ExmaraldaBasicFactory.eINSTANCE.createUDInformation(); this.mapSMetaAnnotation2UDInformation(sMetaAnno, udInfo); metaInfo.getUdMetaInformations().add(udInfo); } } } /** * Creates content of a common timeline, and also creates all TLIs. * * @param sTimeline * @param cTimeLine */ private void map2CommonTimeLine(STimeline sTimeline, CommonTimeLine cTimeLine) { if ((sTimeline == null) || (sTimeline.getEnd() == null) || (sTimeline.getEnd() == 0)) { this.getDocument().getDocumentGraph().createTimeline(); sTimeline = this.getDocument().getDocumentGraph().getTimeline(); } String TLI_id = "T"; int i = 0; for (int j = 0; j <= sTimeline.getEnd(); j++) { TLI tli = ExmaraldaBasicFactory.eINSTANCE.createTLI(); cTimeLine.getTLIs().add(tli); tli.setTime(j + ""); tli.setId(TLI_id + i); i++; // put TLI to list TLI2PointOfTime tliPOT = new TLI2PointOfTime(); tliPOT.pointOfTime = j + ""; tliPOT.tli = tli; this.tLI2PointOfTimeList.add(tliPOT); } } /** * stores number of created tiers */ private Integer numOfTiers = 0; private Integer getNewNumOfTiers() { int num = numOfTiers; numOfTiers++; return (num); } /** * Stores the prefix for tier id */ public String TIER_ID_PREFIX = "TIE"; /** * Stores the name of the tier, which contains tokenization */ public String TIER_NAME_TOKEN = "tok"; /** * Maps a list of token to a tier. That means, that a textual tier will be * created. It calls mapSToken2Event(). <br/> * Please take care, that the mapping for SToken-annotations has to be * treated seperatly * * @param sTokens * @param tier */ private void mapSToken2Tier(List<SToken> sTokens, Tier tier) { tier.setCategory(TIER_NAME_TOKEN); tier.setDisplayName("[" + TIER_NAME_TOKEN + "]"); tier.setId(TIER_ID_PREFIX + this.getNewNumOfTiers()); tier.setType(TIER_TYPE.T); for (SToken sToken : sTokens) { Event event = ExmaraldaBasicFactory.eINSTANCE.createEvent(); tier.getEvents().add(event); this.mapSToken2Event(sToken, event); } } /** * Maps one token to one event. * * @param sToken * @param event */ private void mapSToken2Event(SToken sToken, Event event) { List<DataSourceSequence> sequences = getDocument().getDocumentGraph() .getOverlappedDataSourceSequence(sToken, SALT_TYPE.STIME_OVERLAPPING_RELATION); DataSourceSequence<Integer> sequence = (DataSourceSequence<Integer>) (DataSourceSequence<? extends Number>) sequences .get(0); if (sequence == null) { throw new PepperModuleDataException(this, "Cannot map token to event, because there is no point of time for SToken: " + sToken.getId()); } if (sequence.getStart() == null) { throw new PepperModuleDataException(this, "Cannot map token to event, because start of pot for following token is empty: " + sToken.getId()); } if (sequence.getEnd() == null) { throw new PepperModuleDataException(this, "Cannot map token to event, because end of pot for following token is empty: " + sToken.getId()); } event.setStart(this.getTLI(sequence.getStart().toString())); event.setEnd(this.getTLI(sequence.getEnd().toString())); event.setValue(stringXMLConformer(getDocument().getDocumentGraph().getText(sToken))); } /** * Maps a a SStructuredNode-object to a tier. Therefore it takes all the * annotations and creates one tier for each. <br/> * Please take attention, that SToken-object shall be mapped by * mapSToken2Tier() additionally to create a tier for text. * * @param sNodes * @param tier */ private void mapSStructuredNode2Tiers(List<SStructuredNode> sNodes) { // compute a table, which stores the names of tiers, and the // corresponding sAnnotationQName objects Map<String, Tier> annoName2Tier = new TreeMap<>(); for (SStructuredNode sNode : sNodes) {// walk through the given list for (SAnnotation sAnno : sNode.getAnnotations()) { Tier currTier = null; if (annoName2Tier.containsKey(sAnno.getQName())) { // if annoName2Tier contains QName, than return currTier = annoName2Tier.get(sAnno.getQName()); } else {// create new entry in annoName2Tier currTier = ExmaraldaBasicFactory.eINSTANCE.createTier(); currTier.setCategory(sAnno.getName()); currTier.setDisplayName("[" + sAnno.getName() + "]"); currTier.setType(TIER_TYPE.A); annoName2Tier.put(sAnno.getQName(), currTier); } if ((!sAnno.getQName().equalsIgnoreCase(EXBNameIdentifier.KW_EXB_EVENT_MEDIUM) && (!sAnno.getQName().equalsIgnoreCase(EXBNameIdentifier.KW_EXB_EVENT_URL)))) { Event event = ExmaraldaBasicFactory.eINSTANCE.createEvent(); currTier.getEvents().add(event); SAnnotation sMediumAnno = sNode.getAnnotation(EXBNameIdentifier.KW_EXB_EVENT_MEDIUM); SAnnotation sURLAnno = sNode.getAnnotation(EXBNameIdentifier.KW_EXB_EVENT_URL); if (sMediumAnno != null) { event.setMedium(EVENT_MEDIUM.get(sMediumAnno.getValue().toString())); } if (sURLAnno != null) { event.setUrl(sMediumAnno.getValue().toString()); } this.mapSStructuredNode2Event(sNode, sAnno.getQName(), event); } } } // set the ID of the tier according to its position in the ordered map for (Map.Entry<String, Tier> e : annoName2Tier.entrySet()) { e.getValue().setId(TIER_ID_PREFIX + this.getNewNumOfTiers()); this.basicTranscription.getTiers().add(e.getValue()); } } /** * Maps a structuredNode to an event. * * @param sNode * @param sAnnotationQName * @param event */ private void mapSStructuredNode2Event(SStructuredNode sNode, String sAnnotationQName, Event event) { List<DataSourceSequence> sequences = getDocument().getDocumentGraph().getOverlappedDataSourceSequence(sNode, SALT_TYPE.STIME_OVERLAPPING_RELATION); DataSourceSequence<Integer> sequence = (DataSourceSequence<Integer>) (DataSourceSequence<? extends Number>) sequences .get(0); event.setStart(this.getTLI(sequence.getStart().toString())); event.setEnd(this.getTLI(sequence.getEnd().toString())); SAnnotation sAnno = sNode.getAnnotation(sAnnotationQName); if (sAnno != null) { event.setValue(this.stringXMLConformer(sAnno.getValue_STEXT())); } // map SMeatAnnotations2udInformation for (SMetaAnnotation sMetaAnno : sNode.getMetaAnnotations()) { UDInformation udInfo = ExmaraldaBasicFactory.eINSTANCE.createUDInformation(); this.mapSMetaAnnotation2UDInformation(sMetaAnno, udInfo); event.getUdInformations().add(udInfo); } } /** * Maps a meta annotation to a udInformation * * @param sMetaAnno * @param udInfo */ private void mapSMetaAnnotation2UDInformation(SMetaAnnotation sMetaAnno, UDInformation udInfo) { if ((sMetaAnno.getName() != null) && (!sMetaAnno.getName().equals(""))) { udInfo.setAttributeName(stringXMLConformer(sMetaAnno.getName())); udInfo.setValue(stringXMLConformer(sMetaAnno.getValue_STEXT())); } } /** * This method transforms a given string to a xml conform string and returns * it. * * @param uncleanedString * string which possibly is not conform to xml * @return */ private String stringXMLConformer(String uncleanedString) { String retString = uncleanedString; if (retString != null) { retString = StringEscapeUtils.escapeXml(uncleanedString); retString = retString.replace("", "Ä"); retString = retString.replace("", "Ö"); retString = retString.replace("", "Ü"); retString = retString.replace("", "ä"); retString = retString.replace("", "ö"); retString = retString.replace("", "ü"); retString = retString.replace("", "ß"); } return (retString); } }