Java tutorial
package eionet.cr.util.odp; import java.io.IOException; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.TimeZone; import javanet.staxutils.IndentingXMLStreamWriter; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import eionet.cr.common.CRRuntimeException; import eionet.cr.common.Namespace; import eionet.cr.common.Predicates; import eionet.cr.dao.DAOException; import eionet.cr.dao.DAOFactory; import eionet.cr.dao.HelperDAO; import eionet.cr.dao.ScoreboardSparqlDAO; import eionet.cr.dao.SearchDAO; import eionet.cr.dto.SubjectDTO; import eionet.cr.util.URIUtil; import eionet.cr.util.URLUtil; import eionet.cr.util.Util; /** * Generates ODP (Open Data Portal, http://open-data.europa.eu) datasets' metadata packages from the metadata of * a selected set of indicators. The output generated into a given stream, and is a ZIP file consisting of one RDF/XML formatted * metadata file per indicator. * * @author Jaanus */ public class ODPDatasetsPacker { /** Static logger for this class. */ private static final Logger LOGGER = Logger.getLogger(ODPDatasetsPacker.class); /** Date-time formatter compliant with XML Schema date/time representation in UTC timezone. */ public static final DateFormat XML_SCHEMA_DATETIME_FORMAT = buildXmlSchemaDateFormat(); /** URI of the "main" dataset, as opposed to the "virtual" datasets we generate for eacg indicator. */ private static final String MAIN_DATASET_URI = "http://semantic.digital-agenda-data.eu/dataset/digital-agenda-scoreboard-key-indicators"; /** Expected charset encoding of the generated output. */ private static final String ENCODING = "UTF-8"; /** Default namespace of the generated RDF/XML files that will be zipped. */ private static final Namespace DEFAULT_NAMESPACE = Namespace.ECODP; /** Namespaces used in the generated RDF/XML files about the datasets. */ private static final List<Namespace> DATASET_FILE_NAMESPACES = buildDatasetFileNamespaces(); /** Namespaces used in the generated manifest file. */ private static final List<Namespace> MANIFEST_FILE_NAMESPACES = buildManifestFileNamespaces(); /** Prefix for the package ID that goes into the manifest file header. */ private static final String PACKAGE_ID_PREFIX = "Digital_Agenda_Scoreboard_"; /** URL of the native SPARQL endpoint of the underlying triplestore. */ private static final String NATIVE_SPARQL_ENDPOINT_URL = "http://digital-agenda-data.eu/sparql"; /** URIs of indicators for which the RDF/XML formatted metadata shall be generated. */ private List<String> indicatorUris; /** List of {@link SubjectDTO} where each member represents an indicator from {@link #indicatorUris}. */ List<SubjectDTO> indicatorSubjects; /** A {@link SubjectDTO} representing the "main" dataset identified by {@link #MAIN_DATASET_URI}. */ private SubjectDTO mainDstSubject; /** A boolean indicating if {@link #prepare()} has already been called. */ private boolean isPrepareCalled; /** */ private String datasetUri; /** */ private HashMap<String, List<String>> indicatorToRefAreas = new HashMap<String, List<String>>(); /** */ private HashMap<String, SubjectDTO> indicatorSources = new HashMap<String, SubjectDTO>(); /** */ private HashMap<String, Date> urlLastModificationDates = new HashMap<String, Date>(); /** Earliest observation years found for each indicator. Key = indicator URI, value = year. */ private HashMap<String, Integer> indicatorYears = new HashMap<String, Integer>(); /** */ private ODPAction odpAction; /** * Main constructor for generating ODP dataset metadata package for the given indicators. * * @param datasetUri * @param indicatorUris The URIs of the indicators whose metadata is to be packaged. * @param odpAction */ public ODPDatasetsPacker(String datasetUri, List<String> indicatorUris, ODPAction odpAction) { if (StringUtils.isBlank(datasetUri)) { throw new IllegalArgumentException("The given dataset URIs must not be blank!"); } if (CollectionUtils.isEmpty(indicatorUris)) { throw new IllegalArgumentException("The given list of indicatior URIs must not be empty!"); } if (odpAction == null) { throw new IllegalArgumentException("The given ODP action must not be null!"); } this.datasetUri = datasetUri; this.indicatorUris = indicatorUris; this.odpAction = odpAction; } /** * Does preparations for the {@link #execute(OutputStream)} method, so it should be called before the latter, otherwise the * latter will throw {@link IllegalStateException}. * * The reason for this method is that we can do preparations (e.g. get various stuff from database and triplestore) before * we start streaming the output. This is convenient for exception handling in Stripes action bean events that return a * streaming resolution. * * @throws DAOException If data access error occurs. */ public void prepare() throws DAOException { isPrepareCalled = true; indicatorSubjects = DAOFactory.get().getDao(SearchDAO.class).getSubjectsData(indicatorUris, null); if (CollectionUtils.isEmpty(indicatorSubjects)) { throw new DAOException("Could not find any metadata about the given indicators!"); } HelperDAO helperDao = DAOFactory.get().getDao(HelperDAO.class); mainDstSubject = helperDao.getSubject(datasetUri); if (mainDstSubject == null || mainDstSubject.getPredicateCount() == 0) { throw new DAOException("Could not find any metadata about the main (i.e. parent) dataset!"); } ScoreboardSparqlDAO ssDao = DAOFactory.get().getDao(ScoreboardSparqlDAO.class); for (SubjectDTO indSubj : indicatorSubjects) { String indUri = indSubj.getUri(); List<String> refAreas = ssDao.getDistinctUsedRefAreas(datasetUri, indUri); indicatorToRefAreas.put(indUri, refAreas); String indSourceUri = indSubj.getObjectValue(Predicates.DCTERMS_SOURCE); if (StringUtils.isNotBlank(indSourceUri) && !indicatorSources.containsKey(indSourceUri)) { SubjectDTO indSourceDTO = helperDao.getSubject(indSourceUri); indicatorSources.put(indSourceUri, indSourceDTO); } int earliestYear = ssDao.getEarliestObservationYear(indUri, datasetUri); indicatorYears.put(indUri, earliestYear == 0 ? null : Integer.valueOf(earliestYear)); } } /** * The main execution method. * * @param outputStream Output stream where the zipped file should be written into. * * @throws IOException If any sort of output stream writing error occurs. * @throws XMLStreamException Thrown by methods from the {@link XMLStreamWriter} that is used by called methods. */ public void execute(OutputStream outputStream) throws IOException, XMLStreamException { if (!isPrepareCalled) { throw new IllegalStateException("Prepare has not been called yet!"); } int i = 0; ZipArchiveOutputStream zipOutput = null; try { zipOutput = new ZipArchiveOutputStream(outputStream); for (SubjectDTO indicatorSubject : indicatorSubjects) { createAndWriteDatasetEntry(zipOutput, indicatorSubject, i++); } createAndWriteManifestEntry(zipOutput); } finally { IOUtils.closeQuietly(zipOutput); } } /** * Creates and writes a ZIP archive entry file for the given indicator. * * @param zipOutput ZIP output where the entry goes into. * @param indSubject The indicator whose for whom the entry is written. * @param index 0-based index of the indicator (in the indicator list received from dataabse) that is being written. * * @throws IOException If any sort of output stream writing error occurs. * @throws XMLStreamException Thrown by methods from the {@link XMLStreamWriter} that is used by called methods. */ private void createAndWriteDatasetEntry(ZipArchiveOutputStream zipOutput, SubjectDTO indSubject, int index) throws IOException, XMLStreamException { String id = indSubject.getObjectValue(Predicates.SKOS_NOTATION); if (StringUtils.isEmpty(id)) { id = URIUtil.extractURILabel(indSubject.getUri()); } ZipArchiveEntry entry = new ZipArchiveEntry("datasets/" + id + ".rdf"); zipOutput.putArchiveEntry(entry); writeDatasetEntry(zipOutput, indSubject, index); zipOutput.closeArchiveEntry(); } /** * Writes a ZIP archive entry file for the given indicator. * * @param zipOutput ZIP output where the entry goes into. * @param indSubject The indicator whose for whom the entry is written. * @param index 0-based index of the indicator (in the indicator list received from dataabse) that is being written. * * @throws XMLStreamException Thrown by methods from the {@link XMLStreamWriter} that is used by called methods. */ private void writeDatasetEntry(ZipArchiveOutputStream zipOutput, SubjectDTO indSubject, int index) throws XMLStreamException { // Prepare indicator URI. String uri = indSubject.getUri(); // Prepare indicator skos:notation. String skosNotation = indSubject.getObjectValue(Predicates.SKOS_NOTATION); if (StringUtils.isBlank(skosNotation)) { skosNotation = URIUtil.extractURILabel(uri); } // Prepare indicator skos:prefLabel. String skosPrefLabel = indSubject.getObjectValue(Predicates.SKOS_PREF_LABEL); if (StringUtils.isBlank(skosPrefLabel)) { skosPrefLabel = skosNotation; } // Prepare indicator skos:altLabel. String skosAltLabel = indSubject.getObjectValue(Predicates.SKOS_ALT_LABEL); if (StringUtils.isBlank(skosAltLabel)) { skosAltLabel = skosNotation; } // Prepare indicator description. String indicatorDescription = buildIndicatorDescription(indSubject); // Prepare issued date from the main dataset. String dctIssued = mainDstSubject.getObjectValue(Predicates.DCTERMS_ISSUED); // Prepare modification date from the main dataset. List<String> modifiedDates = mainDstSubject.getObjectValues(Predicates.DCTERMS_MODIFIED); String dctModified = StringUtils.EMPTY; if (CollectionUtils.isNotEmpty(modifiedDates)) { Collections.sort(modifiedDates); dctModified = modifiedDates.get(modifiedDates.size() - 1).trim(); } if (StringUtils.isBlank(dctModified)) { dctModified = Util.virtuosoDateToString(new Date()); } // Prepare the main dataset's identifier. String mainDstIdentifier = URIUtil.extractURILabel(mainDstSubject.getUri()); String mainDstIdentifierForLinks = mainDstIdentifier.replace('-', '_'); // Prepare download URLs. String csvDownloadUrl = "http://digital-agenda-data.eu/download/" + mainDstIdentifier + ".csv.zip"; String ttlDownloadUrl = "http://digital-agenda-data.eu/download/" + mainDstIdentifier + ".ttl.zip"; String codelistsDownloadUrl = "http://digital-agenda-data.eu/datasets/" + mainDstIdentifierForLinks + "/@@codelists"; String dsdDownloadUrl = "http://digital-agenda-data.eu/datasets/" + mainDstIdentifierForLinks + "/@@structure"; String observationsCsvDownloadUrl = buildIndicatorObservationsDownloadUrl(datasetUri, uri, "text/csv"); String observationsRdfDownloadUrl = buildIndicatorObservationsDownloadUrl(datasetUri, uri, "application/rdf+xml"); addLastModificationDate(csvDownloadUrl); addLastModificationDate(ttlDownloadUrl); // Prepare the main dataset's status. String datasetStatus = mainDstSubject.getObjectValue(Predicates.ADMS_STATUS); if (StringUtils.isBlank(datasetStatus)) { datasetStatus = "http://purl.org/adms/status/UnderDevelopment"; } // Prepare STAX indenting writer based on a Java XMLStreamWriter that is based on the given zipped output. XMLStreamWriter xmlWriter = XMLOutputFactory.newInstance().createXMLStreamWriter(zipOutput, ENCODING); IndentingXMLStreamWriter writer = new IndentingXMLStreamWriter(xmlWriter); // Start the XML document writer.writeStartDocument(ENCODING, "1.0"); // Register all relevant namespaces. registerNamespaces(DATASET_FILE_NAMESPACES, writer); // Write root element start tag + default namespace writer.writeStartElement(Namespace.RDF.getUri(), "RDF"); writer.writeDefaultNamespace(DEFAULT_NAMESPACE.getUri()); // Write all other namespace prefixes. for (Namespace namespace : DATASET_FILE_NAMESPACES) { writer.writeNamespace(namespace.getPrefix(), namespace.getUri()); } // Start the dataset tag. writer.writeStartElement(Namespace.DCAT.getUri(), "Dataset"); writer.writeAttribute(Namespace.RDF.getUri(), "about", uri); // Write dct:title writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters(skosPrefLabel); writer.writeEndElement(); // Write dct:alternative writer.writeStartElement(Namespace.DCT.getUri(), "alternative"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters(skosAltLabel); writer.writeEndElement(); // Write dct:description writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters(indicatorDescription); writer.writeEndElement(); // Write dct:identifier writer.writeStartElement(Namespace.DCT.getUri(), "identifier"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters(skosNotation); writer.writeEndElement(); // Write ecodp:interoperabilityLevel writer.writeStartElement(Namespace.ECODP.getUri(), "interoperabilityLevel"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://open-data.europa.eu/kos/interoperability-level/Legal"); writer.writeEndElement(); // Write ecodp:datasetType writer.writeStartElement(Namespace.ECODP.getUri(), "datasetType"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://open-data.europa.eu/kos/dataset-type/Statistical"); writer.writeEndElement(); // Write ecodp:isDocumentedBy for the main dataset's visualisation page. writer.writeStartElement(Namespace.ECODP.getUri(), "isDocumentedBy"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.ECODP.getUri(), "documentationType"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://open-data.europa.eu/kos/documentation-type/RelatedDocumentation"); writer.writeEndElement(); writer.writeStartElement(Namespace.ECODP.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters("http://digital-agenda-data.eu/charts/analyse-one-indicator-and-compare-countries#" + "chart={\"indicator-group\":\"any\",\"indicator\":\"" + skosNotation + "\"}"); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("Dataset visualizations."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters( "Dynamically generated visualizations (i.e. charts, diagrams) of the dataset contents."); writer.writeEndElement(); writer.writeEndElement(); // Write ecodp:isDocumentedBy for the main home page about the main dataset. writer.writeStartElement(Namespace.ECODP.getUri(), "isDocumentedBy"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.ECODP.getUri(), "documentationType"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://open-data.europa.eu/kos/documentation-type/MainDocumentation"); writer.writeEndElement(); writer.writeStartElement(Namespace.ECODP.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters("http://digital-agenda-data.eu/datasets/" + mainDstIdentifierForLinks); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("Parent dataset homepage."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("Main information about metadata, structure, " + "links to downloads, used in the parent dataset this indicator comes from."); writer.writeEndElement(); writer.writeEndElement(); // Write ecodp:isDocumentedBy for the DSD download. writer.writeStartElement(Namespace.ECODP.getUri(), "isDocumentedBy"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.ECODP.getUri(), "documentationType"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://open-data.europa.eu/kos/documentation-type/RelatedDocumentation"); writer.writeEndElement(); writer.writeStartElement(Namespace.ECODP.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters(dsdDownloadUrl); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("Data Structrue Definition of the parent dataset."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("RDF/XML formatted Data Structrue Definition of the parent dataset."); writer.writeEndElement(); writer.writeEndElement(); // Write ecodp:isDocumentedBy for the Codelists download. writer.writeStartElement(Namespace.ECODP.getUri(), "isDocumentedBy"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.ECODP.getUri(), "documentationType"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://open-data.europa.eu/kos/documentation-type/RelatedDocumentation"); writer.writeEndElement(); writer.writeStartElement(Namespace.ECODP.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters(codelistsDownloadUrl); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("Metadata codelists used in the parent dataset."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters( "RDF/XML formatted codelists for metadata used in the parent dataset this indicator comes from."); writer.writeEndElement(); writer.writeEndElement(); // Write dcat:distribution for the SPARQL query that returns indicator observations in CSV format. writer.writeStartElement(Namespace.DCAT.getUri(), "distribution"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("All available observations for the indicator, in CSV format."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCAT.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters(observationsCsvDownloadUrl); writer.writeEndElement(); writer.writeEmptyElement(Namespace.RDF.getUri(), "type"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "resource", "http://www.w3.org/TR/vocab-dcat#Download"); writer.writeStartElement(Namespace.ECODP.getUri(), "distributionFormat"); writer.writeCharacters("text/csv"); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("All indicator observations in CSV format. " + "A simple query that returns table-file having a flat structure, " + "with one row for each statistical observation and one column for each dimension or attribute."); writer.writeEndElement(); writer.writeEndElement(); // Write dcat:distribution for the SPARQL query that returns indicator observations in RDF format. writer.writeStartElement(Namespace.DCAT.getUri(), "distribution"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("All available observations for the indicator, in RDF/XML format."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCAT.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters(observationsRdfDownloadUrl); writer.writeEndElement(); writer.writeEmptyElement(Namespace.RDF.getUri(), "type"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "resource", "http://www.w3.org/TR/vocab-dcat#Download"); writer.writeStartElement(Namespace.ECODP.getUri(), "distributionFormat"); writer.writeCharacters("application/rdf+xml"); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("All indicator observations in RDF/XML format. " + "A simple query that returns observations as triples in RDF/XML format."); writer.writeEndElement(); writer.writeEndElement(); // Write dcat:distribution for the SPARQL endpoint. writer.writeStartElement(Namespace.DCAT.getUri(), "distribution"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("SPARQL endpoint of the entire parent dataset."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCAT.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters("http://digital-agenda-data.eu/data/sparql"); writer.writeEndElement(); writer.writeEmptyElement(Namespace.RDF.getUri(), "type"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "resource", "http://www.w3.org/TR/vocab-dcat#WebService"); writer.writeStartElement(Namespace.ECODP.getUri(), "distributionFormat"); writer.writeCharacters("webservice/sparql"); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters( "SPARQL endpoint for querying and creating applications based on the most recent data."); writer.writeEndElement(); writer.writeEndElement(); // Write dcat:distribution for the CSV download link. writer.writeStartElement(Namespace.DCAT.getUri(), "distribution"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("CSV download of the entire parent dataset."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCAT.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters(csvDownloadUrl); writer.writeEndElement(); writer.writeEmptyElement(Namespace.RDF.getUri(), "type"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "resource", "http://www.w3.org/TR/vocab-dcat#Download"); writer.writeStartElement(Namespace.ECODP.getUri(), "distributionFormat"); writer.writeCharacters("text/csv"); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("Zipped and CSV-formatted entire parent dataset: " + mainDstIdentifier); writer.writeEndElement(); String lastModificationDate = getLastModificationDateString(csvDownloadUrl); if (StringUtils.isNotBlank(lastModificationDate)) { writer.writeStartElement(Namespace.DCT.getUri(), "modified"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#dateTime"); writer.writeCharacters(lastModificationDate); writer.writeEndElement(); } writer.writeEndElement(); // Write dcat:distribution for the TTL download link. writer.writeStartElement(Namespace.DCAT.getUri(), "distribution"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.DCT.getUri(), "title"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("N3/Turtle download of the entire parent dataset."); writer.writeEndElement(); writer.writeStartElement(Namespace.DCAT.getUri(), "accessURL"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#anyURI"); writer.writeCharacters(ttlDownloadUrl); writer.writeEndElement(); writer.writeEmptyElement(Namespace.RDF.getUri(), "type"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "resource", "http://www.w3.org/TR/vocab-dcat#Download"); writer.writeStartElement(Namespace.ECODP.getUri(), "distributionFormat"); writer.writeCharacters("text/n3"); writer.writeEndElement(); writer.writeStartElement(Namespace.DCT.getUri(), "description"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("Zipped and N3-formatted entire parent dataset: " + mainDstIdentifier); writer.writeEndElement(); lastModificationDate = getLastModificationDateString(ttlDownloadUrl); if (StringUtils.isNotBlank(lastModificationDate)) { writer.writeStartElement(Namespace.DCT.getUri(), "modified"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#dateTime"); writer.writeCharacters(lastModificationDate); writer.writeEndElement(); } writer.writeEndElement(); // Write reference areas. List<String> refAreas = indicatorToRefAreas.get(uri); if (CollectionUtils.isNotEmpty(refAreas)) { for (String refArea : refAreas) { String odpCountry = ODPCountryMappings.getMappingFor(refArea); if (StringUtils.isNotBlank(odpCountry)) { writer.writeStartElement(Namespace.DCT.getUri(), "spatial"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", odpCountry); writer.writeEndElement(); } else { LOGGER.info("Found no ODP mapping for " + refArea); } } } // Write dct:publisher writer.writeStartElement(Namespace.DCT.getUri(), "publisher"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://publications.europa.eu/resource/authority/corporate-body/CNECT"); writer.writeEndElement(); // Write ecodp:contactPoint writer.writeStartElement(Namespace.ECODP.getUri(), "contactPoint"); writer.writeStartElement(Namespace.FOAF.getUri(), "agent"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://publications.europa.eu/resource/authority/corporate-body/CNECT/C4"); writer.writeEmptyElement(Namespace.FOAF.getUri(), "mbox"); writer.writeAttribute(Namespace.RDF.getUri(), "resource", "mailto:CNECT-F4@ec.europa.eu"); writer.writeEmptyElement(Namespace.FOAF.getUri(), "workplaceHomepage"); writer.writeAttribute(Namespace.RDF.getUri(), "resource", "http://digital-agenda-data.eu/"); writer.writeStartElement(Namespace.FOAF.getUri(), "name"); writer.writeAttribute(Namespace.XML.getPrefix(), Namespace.XML.getUri(), "lang", "en"); writer.writeCharacters("DG CONNECT Unit F4 Knowledge Base"); writer.writeEndElement(); writer.writeEndElement(); writer.writeEndElement(); // Write dct:issued if (StringUtils.isNotEmpty(dctIssued)) { writer.writeStartElement(Namespace.DCT.getUri(), "issued"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#dateTime"); writer.writeCharacters(dctIssued); writer.writeEndElement(); } // Write dct:modified (mandatory, so don't even check if empty) writer.writeStartElement(Namespace.DCT.getUri(), "modified"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "datatype", "http://www.w3.org/2001/XMLSchema#dateTime"); writer.writeCharacters(dctModified); writer.writeEndElement(); // Write dct:license writer.writeStartElement(Namespace.DCT.getUri(), "license"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://open-data.europa.eu/kos/licence/EuropeanCommission"); writer.writeEndElement(); // Write ecodp:datasetStatus writer.writeStartElement(Namespace.ECODP.getUri(), "datasetStatus"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", StringUtils.replace(datasetStatus, "http://purl.org/adms/status/", "http://open-data.europa.eu/kos/dataset-status/")); writer.writeEndElement(); // Write dct:language writer.writeStartElement(Namespace.DCT.getUri(), "language"); writer.writeEmptyElement(Namespace.SKOS.getUri(), "Concept"); writer.writeAttribute(Namespace.RDF.getUri(), "about", "http://publications.europa.eu/resource/authority/language/ENG"); writer.writeEndElement(); // Write ecodp:accrualPeriodicity writer.writeEmptyElement(Namespace.ECODP.getUri(), "accrualPeriodicity"); writer.writeAttribute(Namespace.RDF.getPrefix(), Namespace.RDF.getUri(), "resource", "http://open-data.europa.eu/kos/accrual-periodicity/other"); // Write dct:temporal Integer earliestObservationYear = indicatorYears.get(uri); if (earliestObservationYear != null && earliestObservationYear.intValue() > 0) { writer.writeStartElement(Namespace.DCT.getUri(), "temporal"); writer.writeAttribute(Namespace.RDF.getUri(), "parseType", "Resource"); writer.writeStartElement(Namespace.ECODP.getUri(), "periodStart"); writer.writeCharacters(earliestObservationYear.toString()); writer.writeEndElement(); writer.writeEndElement(); } // End the dataset tag. writer.writeEndElement(); // End the root tag. writer.writeEndElement(); // End the document writer.writeEndDocument(); } /** * * @param zipOutput * @throws XMLStreamException * @throws IOException */ private void createAndWriteManifestEntry(ZipArchiveOutputStream zipOutput) throws XMLStreamException, IOException { ZipArchiveEntry entry = new ZipArchiveEntry("manifest.xml"); zipOutput.putArchiveEntry(entry); // Prepare STAX indenting writer based on a Java XMLStreamWriter that is based on the given zipped output. XMLStreamWriter xmlWriter = XMLOutputFactory.newInstance().createXMLStreamWriter(zipOutput, ENCODING); IndentingXMLStreamWriter writer = new IndentingXMLStreamWriter(xmlWriter); int i = 1; writeManifestHeader(writer); for (SubjectDTO indicatorSubject : indicatorSubjects) { writeOdpAction(writer, indicatorSubject, i++); } writeManifestFooter(writer); zipOutput.closeArchiveEntry(); } /** * * @param writer * @param indicatorSubject * @param index * @throws XMLStreamException */ private void writeOdpAction(IndentingXMLStreamWriter writer, SubjectDTO indicatorSubject, int index) throws XMLStreamException { String indicatorUri = indicatorSubject.getUri(); String indicatorNotation = indicatorSubject.getObjectValue(Predicates.SKOS_NOTATION); if (StringUtils.isBlank(indicatorNotation)) { indicatorNotation = URIUtil.extractURILabel(indicatorUri); } // Start the ecodp:action tag. writer.writeStartElement(Namespace.ECODP.getUri(), "action"); // Write attributes of the ecodp:action tag. writer.writeAttribute(Namespace.ECODP.getUri(), "id", odpAction.getNameCamelCase() + index); writer.writeAttribute(Namespace.ECODP.getUri(), "object-ckan-name", indicatorNotation); writer.writeAttribute(Namespace.ECODP.getUri(), "object-type", "dataset"); writer.writeAttribute(Namespace.ECODP.getUri(), "object-uri", indicatorUri); // Start the ecodp:action refinement tag. if (ODPAction.ADD_DRAFT.equals(odpAction) || ODPAction.ADD_PUBLISHED.equals(odpAction)) { writer.writeEmptyElement(Namespace.ECODP.getUri(), "add-replace"); writer.writeAttribute(Namespace.ECODP.getUri(), "object-status", ODPAction.ADD_DRAFT.equals(odpAction) ? "draft" : "published"); writer.writeAttribute(Namespace.ECODP.getUri(), "package-path", "/datasets/" + indicatorNotation + ".rdf"); } else if (ODPAction.SET_DRAFT.equals(odpAction) || ODPAction.SET_PUBLISHED.equals(odpAction)) { writer.writeEmptyElement(Namespace.ECODP.getUri(), "change-status"); writer.writeAttribute(Namespace.ECODP.getUri(), "object-status", ODPAction.SET_DRAFT.equals(odpAction) ? "draft" : "published"); } else if (ODPAction.REMOVE.equals(odpAction)) { writer.writeEmptyElement(Namespace.ECODP.getUri(), "remove"); } else { throw new IllegalArgumentException("Unsupported ODP action: " + odpAction); } // Close the ecodp:action tag. writer.writeEndElement(); } /** * @param writer * @throws XMLStreamException */ private void writeManifestHeader(XMLStreamWriter writer) throws XMLStreamException { // Start the XML document writer.writeStartDocument(ENCODING, "1.0"); // Register all relevant namespaces. registerNamespaces(MANIFEST_FILE_NAMESPACES, writer); // Write root element start tag (i.e. <ecodp:manifest>) writer.writeStartElement(Namespace.ECODP.getUri(), "manifest"); // Write namespace prefixes in the root element start tag. for (Namespace namespace : MANIFEST_FILE_NAMESPACES) { writer.writeNamespace(namespace.getPrefix(), namespace.getUri()); } // It's ok to instantiate SimpleDateFormat every time here, since this method gets called once per package generation. String packageId = PACKAGE_ID_PREFIX + new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); String generationDateTime = Util.virtuosoDateToString(new Date()); writer.writeAttribute(Namespace.ECODP.getUri(), "creation-date-time", generationDateTime); writer.writeAttribute(Namespace.ECODP.getUri(), "package-id", packageId); writer.writeAttribute(Namespace.ECODP.getUri(), "priority", "normal"); writer.writeAttribute(Namespace.ECODP.getUri(), "publisher", "http://publications.europa.eu/resource/authority/corporate-body/CNECT"); writer.writeAttribute(Namespace.ECODP.getUri(), "version", "1.0"); writer.writeAttribute(Namespace.XSI.getUri(), "schemaLocation", "http://open-data.europa.eu/ontologies/protocol-v1.0/odp-protocol.xsd"); } /** * * @param writer * @throws XMLStreamException */ private void writeManifestFooter(XMLStreamWriter writer) throws XMLStreamException { // Close root element tag writer.writeEndElement(); } /** * * @param indSubject * @return */ private String buildIndicatorDescription(SubjectDTO indSubject) { StringBuilder sb = new StringBuilder(); // First, append the skos:definition of the indicator, without any headers as it will usually be displayed by the ODP // right after the title. String skosDefinition = indSubject.getObjectValue(Predicates.SKOS_DEFINITION); if (StringUtils.isNotBlank(skosDefinition)) { sb.append(skosDefinition); } // Now the skos:note of the indicator, with header "Notes". String skosNotes = indSubject.getObjectValue(Predicates.SKOS_NOTE); if (StringUtils.isNotBlank(skosNotes)) { if (sb.length() > 0) { sb.append("\n\n### Notes"); } sb.append("\n\n").append(skosNotes); } // Now the section about the indicator's original source. String indSourceUri = indSubject.getObjectValue(Predicates.DCTERMS_SOURCE); if (StringUtils.isNotBlank(indSourceUri)) { SubjectDTO sourceDTO = indicatorSources.get(indSourceUri); if (sourceDTO != null) { String sourceHomePage = sourceDTO.getObjectValue(Predicates.FOAF_PAGE); String sourceDefinition = sourceDTO.getObjectValue(Predicates.SKOS_DEFINITION); if (StringUtils.isBlank(sourceDefinition)) { sourceDefinition = sourceDTO.getObjectValue(Predicates.SKOS_PREF_LABEL); } if (StringUtils.isBlank(sourceDefinition)) { sourceDefinition = sourceDTO.getObjectValue(Predicates.SKOS_ALT_LABEL); } boolean isNotBlankSourceDefinition = StringUtils.isNotBlank(sourceDefinition); boolean isNotBlankSourceHomePage = StringUtils.isNotBlank(sourceHomePage); if (isNotBlankSourceDefinition || isNotBlankSourceHomePage) { if (sb.length() > 0) { sb.append("\n\n### Original source"); } if (isNotBlankSourceDefinition) { sb.append("\n\n").append(sourceDefinition).append(isNotBlankSourceHomePage ? ":" : ""); } if (isNotBlankSourceHomePage) { sb.append("\n\n").append(sourceHomePage); } } } } // Finally the section about the indicator's parent dataset. if (mainDstSubject != null) { String mainDstIdentifier = URIUtil.extractURILabel(mainDstSubject.getUri()); if (StringUtils.isNotBlank(mainDstIdentifier)) { String mainDatasetLink = "http://digital-agenda-data.eu/datasets/" + mainDstIdentifier.replace('-', '_'); if (sb.length() > 0) { sb.append("\n\n### Parent dataset\n\nThis dataset is part of of another dataset:"); } sb.append("\n\n").append(mainDatasetLink); } } return sb.toString().trim(); } /** * Registers the given namespaces in the given {@link XMLStreamWriter}, by calling setPrefix(...) of the latter for each. * * @param xmlWriter The namespaces to register. * @param xmlWriter The writer to register in. * @throws XMLStreamException In case the write throws exception. */ private void registerNamespaces(List<Namespace> namespaces, XMLStreamWriter xmlWriter) throws XMLStreamException { for (Namespace namespace : namespaces) { xmlWriter.setPrefix(namespace.getPrefix(), namespace.getUri()); } } /** * Build a list of namespaces used in the generated RDF/XML files about the datasets. * * @return The list. */ private static List<Namespace> buildDatasetFileNamespaces() { ArrayList<Namespace> list = new ArrayList<Namespace>(); list.add(Namespace.RDF); list.add(Namespace.RDFS); list.add(Namespace.OWL); list.add(Namespace.XSD); list.add(Namespace.DC); list.add(Namespace.DCT); list.add(Namespace.DCAM); list.add(Namespace.DCAT); list.add(Namespace.ECODP); list.add(Namespace.FOAF); list.add(Namespace.SKOS); list.add(Namespace.SKOS_XL); return list; } /** * Build a list of namespaces used in the generated manifest file. * * @return The list. */ private static List<Namespace> buildManifestFileNamespaces() { ArrayList<Namespace> list = new ArrayList<Namespace>(); list.add(Namespace.ECODP); list.add(Namespace.XSI); return list; } /** * * @return */ private static DateFormat buildXmlSchemaDateFormat() { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss'Z'"); sdf.setTimeZone(TimeZone.getTimeZone("UTC")); return sdf; } /** * * @param url */ private void addLastModificationDate(String url) { if (!urlLastModificationDates.containsKey(url)) { urlLastModificationDates.put(url, URLUtil.getLastModified(url)); } } /** * * @param url * @return */ private String getLastModificationDateString(String url) { Date date = urlLastModificationDates.get(url); return date == null ? null : XML_SCHEMA_DATETIME_FORMAT.format(date); } /** * * @param datasetUri * @param indicatorUri * @param mimeType * @return */ private String buildIndicatorObservationsDownloadUrl(String datasetUri, String indicatorUri, String mimeType) { try { String strQuery = null; IndicatorObservationsQuery query = new IndicatorObservationsQuery(datasetUri, indicatorUri); if ("text/csv".equals(mimeType)) { strQuery = query.asSelect(); } else if ("application/rdf+xml".equals(mimeType)) { strQuery = query.asConstruct(); } StringBuilder sb = new StringBuilder(NATIVE_SPARQL_ENDPOINT_URL); if (StringUtils.isNotBlank(strQuery)) { sb.append("?query=").append(URLEncoder.encode(strQuery, "UTF-8")); if (StringUtils.isNotBlank(mimeType)) { sb.append("&format=").append(URLEncoder.encode(mimeType, "UTF-8")); } } return sb.toString(); } catch (UnsupportedEncodingException e) { throw new CRRuntimeException(e.getMessage(), e); } } }