Java tutorial
/* * Copyright or or Copr. Ecole des Mines d'Als (2012-2014) * * This software is a computer program whose purpose is to provide * several functionalities for the processing of semantic data * sources such as ontologies or text corpora. * * This software is governed by the CeCILL license under French law and * abiding by the rules of distribution of free software. You can use, * modify and/ or redistribute the software under the terms of the CeCILL * license as circulated by CEA, CNRS and INRIA at the following URL * "http://www.cecill.info". * * As a counterpart to the access to the source code and rights to copy, * modify and redistribute granted by the license, users are provided only * with a limited warranty and the software's author, the holder of the * economic rights, and the successive licensors have only limited * liability. * In this respect, the user's attention is drawn to the risks associated * with loading, using, modifying and/or developing or reproducing the * software by the user in light of its specific status of free software, * that may mean that it is complicated to manipulate, and that also * therefore means that it is reserved for developers and experienced * professionals having in-depth computer knowledge. Users are therefore * encouraged to load and test the software's suitability as regards their * requirements in conditions enabling the security of their systems and/or * data to be ensured and, more generally, to use and operate it in the * same conditions as regards security. * * The fact that you are presently reading this means that you have had * knowledge of the CeCILL license and that you accept its terms. */ package com.github.lgi2p.obirs.utils; import com.github.lgi2p.obirs.core.model.ObirsQuery; import com.github.lgi2p.obirs.core.model.ObirsResult; import com.github.lgi2p.obirs.core.model.RefinedObirsQuery; import com.github.lgi2p.obirs.core.engine.utils.ConceptMatch; import com.github.lgi2p.obirs.utils.IndexerJSON.ItemMetadata; import com.github.lgi2p.obirs.utils.autocomplete.Autocompletion_Trie; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; import org.openrdf.model.URI; import org.slf4j.LoggerFactory; import slib.graph.model.impl.repo.URIFactoryMemory; import slib.graph.model.repo.URIFactory; import slib.sml.sm.core.engine.SM_Engine; import slib.utils.ex.SLIB_Ex_Critic; /** * * Utility class used to convert JSON format outputs by the annotator into the * JSON format expected to load OBIRS index. The annotator generates a result * file for each item. The input file expected by Obirs is: * {"id":"14622599","title":"XXXXX","conceptIds":["http://obirs/D008382",...],"href":"http://www..."} * * @author Sbastien Harispe <sebastien.harispe@gmail.com> */ public class JSONConverter { static org.slf4j.Logger logger = LoggerFactory.getLogger(JSONConverter.class); // {"concepts": [{"uri": "http://www.cea.fr/ontotoxnuc#AnalyseStatistique", "weight": 0.5},{"uri":"http://www.cea.fr/ontotoxnuc#Uranium", "weight": 0.5}]} public static ObirsQuery parseObirsJSONQuery(SM_Engine engine, String jsonQuery) throws ParseException, Exception { logger.info("parsing query: " + jsonQuery); URIFactory factory = URIFactoryMemory.getSingleton(); jsonQuery = jsonQuery.replace("\n", ""); jsonQuery = jsonQuery.replace("\r", ""); JSONParser parser = new JSONParser(); Object obj = parser.parse(jsonQuery); JSONObject jsonObject = (JSONObject) obj; String aggregator = (String) jsonObject.get("aggregator"); String similarityMeasure = (String) jsonObject.get("similarityMeasure"); Double aggregatorParameter; if (jsonObject.get("aggregatorParameter") != null) { aggregatorParameter = Double.parseDouble(jsonObject.get("aggregatorParameter").toString()); } else { aggregatorParameter = 2.0; } Double scoreThreshold = (Double) jsonObject.get("scoreThreshold"); Integer numberOfResults = (Integer) jsonObject.get("numberOfResults"); JSONArray concepts = (JSONArray) jsonObject.get("concepts"); Map<URI, Double> conceptsMap = new HashMap<URI, Double>(); ObirsQuery query = new ObirsQuery(conceptsMap, aggregator, similarityMeasure, aggregatorParameter, scoreThreshold, numberOfResults); if (concepts != null) { Iterator<JSONObject> iterator = concepts.iterator(); while (iterator.hasNext()) { JSONObject concept = iterator.next(); Double weight = Double.parseDouble(concept.get("weight").toString()); Object uriString = concept.get("uri"); if (uriString == null) { throw new SLIB_Ex_Critic("Error a URI must be specified for each query concept"); } URI uri = factory.getURI(uriString.toString()); if (engine.getGraph().containsVertex(uri)) { query.addConcept(uri, weight); } else { throw new Exception("concept associated to URI '" + uri + "' does not exists..."); } } } return query; } //{ // "selectedItemURIs":["http://www.mines-ales.fr/obirs/items/1132","http://www.mines-ales.fr/obirs/items/1133"], // "rejectedItemURIs":["http://www.mines-ales.fr/obirs/items/1131"], // "query": { // "concepts": [ // {"uri": "http://www.cea.fr/ontotoxnuc#AnalyseStatistique", "weight": 0.5}, // {"uri":"http://www.cea.fr/ontotoxnuc#Uranium", "weight": 0.5} // ] // } //} public static RefinedObirsQuery parseRefinedObirsQuery(SM_Engine engine, String jsonQuery) throws ParseException, IOException, Exception { logger.info("parsing json refined query"); logger.info("query: " + jsonQuery); jsonQuery = jsonQuery.replace("\n", ""); jsonQuery = jsonQuery.replace("\r", ""); JSONParser parser = new JSONParser(); Object obj = parser.parse(jsonQuery); JSONObject jsonObject = (JSONObject) obj; JSONObject _query = (JSONObject) jsonObject.get("query"); StringWriter jsQuery = new StringWriter(); _query.writeJSONString(jsQuery); ObirsQuery obirsQuery = parseObirsJSONQuery(engine, jsQuery.toString()); List<URI> selectedItemURIs = toURIs((List<String>) jsonObject.get("selectedItemURIs")); List<URI> rejectedItemURIs = toURIs((List<String>) jsonObject.get("rejectedItemURIs")); return new RefinedObirsQuery(obirsQuery, selectedItemURIs, rejectedItemURIs); } // { // "results":[ // { // "itemTitle":"f835.json", // "score":1.0, // "itemId":"919", // "itemURI":"ns0:919", // "concepts":[ // { // "relationType":"ns2:EXACT", // "score":1.0, // "queryConceptURI":"ns1:AnalyseStatistique", // "matchingConceptURI":"ns1:AnalyseStatistique" // }, // { // "relationType":"ns2:EXACT", // "score":1.0, // "queryConceptURI":"ns1:Uranium", // "matchingConceptURI":"ns1:Uranium" // } // ], // "href":"\/data\/toxnuc\/toxnuc_annots_5_11_14\/annots\/f835.json" // }, // ... // ], // "infoConcepts":[ // { // "label":"statistical analysis", // "uri":"ns1:AnalyseStatistique" // }, // { // "label":"actinide", // "uri":"ns1:Actinide" // }, // ... // ], // "prefixes":[ // {"ns":"http:\/\/www.cea.fr\/ontotoxnuc#","prefix":"ns1"}, // {"ns":"http:\/\/www.mines-ales.fr\/obirs\/match_type\/","prefix":"ns2"}, // ... // ] //} public static String jsonifyObirsResults(List<ObirsResult> results, IndexerJSON indexer, Map<URI, String> indexURI2Label) throws IOException { Map<String, String> namespace2prefix = new HashMap<String, String>(); JSONArray jsonResults = new JSONArray(); Set<URI> infoConcepts = new HashSet(); StringWriter json = new StringWriter(); for (ObirsResult result : results) { JSONObject jsonResult = new JSONObject(); URI itemResultURI = result.getItemURI(); ItemMetadata metadata = indexer.getMetadata(itemResultURI); jsonResult.put("itemTitle", metadata.title); jsonResult.put("href", metadata.href); jsonResult.put("itemId", metadata.idLiteral); jsonResult.put("itemURI", buildShortURI(result.getItemURI(), namespace2prefix)); jsonResult.put("score", result.getScore()); JSONArray jsonConcepts = new JSONArray(); if (result.getConcepts() != null) { for (ConceptMatch concept : result.getConcepts()) { JSONObject jsonConcept = new JSONObject(); jsonConcept.put("queryConceptURI", buildShortURI(concept.getQueryConceptURI(), namespace2prefix)); jsonConcept.put("matchingConceptURI", buildShortURI(concept.getMatchingConceptURI(), namespace2prefix)); infoConcepts.add(concept.getQueryConceptURI()); infoConcepts.add(concept.getMatchingConceptURI()); jsonConcept.put("relationType", buildShortURI(concept.getRelationType(), namespace2prefix)); jsonConcept.put("score", concept.getScore()); jsonConcepts.add(jsonConcept); } } jsonResult.put("concepts", jsonConcepts); jsonResults.add(jsonResult); } JSONArray jsonInfoConcepts = new JSONArray(); for (URI uri : infoConcepts) { JSONObject jsonQueryConcept = new JSONObject(); jsonQueryConcept.put("uri", buildShortURI(uri, namespace2prefix)); jsonQueryConcept.put("label", indexURI2Label.get(uri)); jsonInfoConcepts.add(jsonQueryConcept); } JSONArray jsonInfoNamespace = new JSONArray(); for (Map.Entry<String, String> e : namespace2prefix.entrySet()) { JSONObject jsonQueryConcept = new JSONObject(); jsonQueryConcept.put("ns", e.getKey()); jsonQueryConcept.put("prefix", e.getValue()); jsonInfoNamespace.add(jsonQueryConcept); } JSONObject finalJSONresult = new JSONObject(); finalJSONresult.put("prefixes", jsonInfoNamespace); finalJSONresult.put("results", jsonResults); finalJSONresult.put("infoConcepts", jsonInfoConcepts); finalJSONresult.writeJSONString(json); return json.toString(); } /** * Convert a given ObirsQuery object into a JSON format * * @param query * @return * @throws IOException */ public static String jsonifyObirsQuery(ObirsQuery query) throws IOException { JSONObject jsonObject = new JSONObject(); jsonObject.put("aggregator", query.getAggregator()); jsonObject.put("aggregatorParameter", query.getAggregatorParameter()); jsonObject.put("similarityMeasure", query.getSimilarityMeasure()); jsonObject.put("scoreThreshold", query.getScoreThreshold()); jsonObject.put("numberOfResults", query.getNumberOfResults()); StringWriter jsonQuery = new StringWriter(); JSONArray jsonConcepts = new JSONArray(); for (Map.Entry<URI, Double> queryConcept : query.getConcepts().entrySet()) { JSONObject jsonConcept = new JSONObject(); URI conceptURI = queryConcept.getKey(); jsonConcept.put("uri", conceptURI.stringValue()); jsonConcept.put("weight", queryConcept.getValue()); jsonConcepts.add(jsonConcept); } jsonObject.put("concepts", jsonConcepts); jsonObject.writeJSONString(jsonQuery); return jsonQuery.toString(); } /** * {"log": "", "annotations" : [{ "text": "uranium", "startpos": 723, * "endpos":730, "uris":["http://www.cea.fr/ontotoxnuc#Uranium"] },{ "text": * "protein", "startpos": 1837, "endpos":1845, * "uris":["http://www.cea.fr/ontotoxnuc#Proteine"] },{ "text": "plant", * "startpos": 4661, "endpos":4666, * "uris":["http://www.cea.fr/ontotoxnuc#Plante"] }]} * * @param jsonQuery * @return * @throws ParseException * @throws Exception */ protected static String toJSONindexFormat(int id, String title, String content, String href) throws ParseException, Exception { URIFactory f = URIFactoryMemory.getSingleton(); content = content.replace("\n", "").replace("\r", ""); Object obj = new JSONParser().parse(content); JSONObject jsonObject = (JSONObject) obj; JSONArray annotations = (JSONArray) jsonObject.get("annotations"); Set<URI> concepts = new HashSet<URI>(); if (annotations != null) { Iterator<JSONObject> iterator = annotations.iterator(); while (iterator.hasNext()) { JSONObject concept = iterator.next(); JSONArray uris = (JSONArray) concept.get("uris"); for (int i = 0; i < uris.size(); i++) { concepts.add(f.getURI((String) uris.get(i))); } } } String urisAsString = ""; for (URI u : concepts) { if (!urisAsString.isEmpty()) { urisAsString += ","; } urisAsString += "\"" + u.stringValue() + "\""; } return "{\"id\":\"" + id + "\",\"title\":\"" + title + "\",\"conceptIds\":[" + urisAsString + "],\"href\":\"" + href + "\"}"; } private static List<URI> toURIs(List<String> list) { List<URI> uris = new ArrayList<URI>(list.size()); URIFactory f = URIFactoryMemory.getSingleton(); for (String s : list) { uris.add(f.getURI(s)); } return uris; } private static String buildShortURI(URI itemURI, Map<String, String> namespace2prefix) { String ns, prefix, localname; ns = itemURI.getNamespace(); localname = itemURI.getLocalName(); if (!namespace2prefix.containsKey(ns)) { prefix = "ns" + namespace2prefix.size(); namespace2prefix.put(ns, prefix); } else { prefix = namespace2prefix.get(ns); } return prefix + ":" + localname; } public static String buildJSONString(Set<Autocompletion_Trie.AutocompleteElement> autocomplete) throws IOException { JSONArray jsonResults = new JSONArray(); StringWriter json = new StringWriter(); for (Autocompletion_Trie.AutocompleteElement result : autocomplete) { JSONObject jsonResult = new JSONObject(); jsonResult.put("uri", result.id); jsonResult.put("label", result.label); jsonResults.add(jsonResult); } JSONObject finalJSONresult = new JSONObject(); finalJSONresult.put("results", jsonResults); finalJSONresult.writeJSONString(json); return json.toString(); } public static void main(String[] args) throws Exception { String annotdir = "/data/toxnuc/toxnuc_annots_5_11_14/annots"; String annotsIndex = "/data/toxnuc/toxnuc_annots_5_11_14.json"; File folder = new File(annotdir); File[] listOfFiles = folder.listFiles(); PrintWriter printWriter = new PrintWriter(annotsIndex); int i = 0; for (File file : listOfFiles) { if (file.isFile()) { System.out.println(file.getPath()); String title = file.getName(); String href = file.getPath(); String json = toJSONindexFormat(i, title, Utils.readFileAsString(file), href); i++; printWriter.println(json); } } printWriter.close(); System.out.println("consult: " + annotsIndex); } }