Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package fusion; import java.awt.BorderLayout; import java.awt.GridBagConstraints; import java.awt.GridBagLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.*; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; import java.text.SimpleDateFormat; import javax.swing.*; import javax.swing.tree.DefaultMutableTreeNode; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import org.semanticweb.owl.align.Alignment; import org.semanticweb.owl.align.AlignmentException; import org.semanticweb.owl.align.AlignmentProcess; import org.semanticweb.owl.align.Cell; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.*; import com.hp.hpl.jena.vocabulary.*; import fr.inrialpes.exmo.align.impl.method.StringDistAlignment; import fr.inrialpes.exmo.align.parser.AlignmentParser; import java.io.*; import java.nio.charset.Charset; /** * * @author ioanna */ public class Fusion { static float homogeneityThreshold; static float occurenceFrequencyThreshold; static ArrayList<SameAsLink> links = new ArrayList<>(); static ArrayList<ArrayList<URI>> reconciled = new ArrayList<>(); // Equivalence // class static HashMap<String, Source> sources = new HashMap<>(); static HashMap<Integer, URI> distinctURLs = new HashMap<>(); static HashMap<URI, Instance> instances = new HashMap<>(); private static final JFrame guiFrame = new JFrame(); private static final JFrame fusionFrame = new JFrame(); //private static final JPanel container = new JPanel(); private static JSplitPane split; private static boolean treeDisplayed = false; private static Model graph; // le graphe RDF private static Model fusionGraph; // le graphe RDF apres la fusion // fichiers // utilisateurs // private static String datasetFile1 = "dataINA/dataset_novideoperson-clean.ttl"; // private static String datasetFile2 = "dataINA/dataset_novideoperson-clean.ttl"; // private static String sameAsLinksFile = "dataINA/res.n3"; private static String datasetFile1 = "PR/restaurants/restaurant1.rdf"; private static String datasetFile2 = "PR/restaurants/restaurant2.rdf"; private static String mappingFile = "PR/rest-mappings.rdf"; private static String sameAsLinksFile = "PR/restaurants/restaurant1_restaurant2_goldstandard.rdf"; // fixes private static String sourceInfoFile = "Fusion/sourceInfo.txt"; //syntaxe windows private static String logicRulesFile = "Fusion/logicRules.txt"; private static String outputFile = "qualityAnnotation.rdf"; public static void main(String[] args) { // try{ // // final StringBuilder sb = new StringBuilder(); // sb.append("LOG-"); // SimpleDateFormat str = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss-"); // // sb.append(str.format(new Date())); // final File file = File.createTempFile(sb.toString(), ".log"); // final PrintStream printStream = new PrintStream(file); // System.setOut(printStream); // //System.setErr(printStream); initGui(); initListPanel(); // }catch(Exception e){ // System.out.println(e.toString()); // } } // init JFrame private static void initGui() { guiFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); guiFrame.setTitle("Parameters, RDFTree, Data fusion"); guiFrame.setSize(1200, 800); guiFrame.setLocationRelativeTo(null); //container.setLayout(new BoxLayout(container, BoxLayout.X_AXIS)); //guiFrame.add(container); split = new JSplitPane(JSplitPane.HORIZONTAL_SPLIT, null, null); guiFrame.add(split); } // GUI to submit thresholds and start the process private static void initListPanel() { final JPanel listPanel = new JPanel(); listPanel.setVisible(true); // Buttons JButton setSameAsLinksFile = new JButton("Set SameAsLinks file"); JButton setDatasetFile1 = new JButton("Set Dataset1 file"); JButton setDatasetFile2 = new JButton("Set Dataset2 file"); JButton sourceButton = new JButton(" Edit Source Information "); JButton rulesButton = new JButton(" Edit Logic Rules "); JLabel homogeneityLabel = new JLabel("Threshold for Homogeneity"); final JTextField homogeneityTextfield = new JTextField("0.10"); JLabel frequencyLabel = new JLabel("Threshold for Occurence Frequency"); final JTextField frequencyTextfield = new JTextField("0.01"); homogeneityTextfield.setInputVerifier(new MyInputVerifier()); frequencyTextfield.setInputVerifier(new MyInputVerifier()); JButton constructINAButton = new JButton(" Construct INA graph "); JButton constructButton = new JButton("Generic construct graph"); JButton loadButton = new JButton(" Load previous graph "); JButton fusionButton = new JButton(" Data fusion "); JButton displayFusionButton = new JButton(" Display fused data "); setSameAsLinksFile.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { try { setSameAsLinksFile(); } catch (IOException e) { e.printStackTrace(); } } }); setDatasetFile1.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { try { setDatasetFile1(); } catch (IOException e) { e.printStackTrace(); } } }); setDatasetFile2.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { try { setDatasetFile2(); } catch (IOException e) { e.printStackTrace(); } } }); sourceButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { try { java.awt.Desktop.getDesktop().open(new File(sourceInfoFile)); } catch (IOException ex) { Logger.getLogger(Fusion.class.getName()).log(Level.SEVERE, null, ex); } } }); rulesButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { try { java.awt.Desktop.getDesktop().open(new File(logicRulesFile)); } catch (IOException ex) { Logger.getLogger(Fusion.class.getName()).log(Level.SEVERE, null, ex); } } }); constructINAButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { if (homogeneityTextfield.getText().length() != 0 && frequencyTextfield.getText().length() != 0) { Fusion.homogeneityThreshold = Float.parseFloat(homogeneityTextfield.getText()); Fusion.occurenceFrequencyThreshold = Float.parseFloat(frequencyTextfield.getText()); try { constructINAGraph(); //genericConstructGraph("", "", "", "", "", "", ""); } catch (FileNotFoundException ex) { Logger.getLogger(Fusion.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(Fusion.class.getName()).log(Level.SEVERE, null, ex); } catch (URISyntaxException e) { e.printStackTrace(); } } else { JOptionPane.showMessageDialog(guiFrame, "Please fill in all the fields!"); } } }); loadButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { loadGraph(); } }); fusionButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { executeFusion(); } }); constructButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { try { genericConstructGraph(); } catch (IOException | AlignmentException | URISyntaxException e) { e.printStackTrace(); } } }); displayFusionButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { displayFusedGraph(); } }); listPanel.setLayout(new GridBagLayout()); GridBagConstraints c = new GridBagConstraints(); c.gridx = 0; c.gridy = 0; listPanel.add(setSameAsLinksFile, c); c.gridx = 1; c.gridy = 0; listPanel.add(setDatasetFile1, c); c.gridx = 2; c.gridy = 0; listPanel.add(setDatasetFile2, c); c.gridx = 1; c.gridy = 1; listPanel.add(sourceButton, c); c.gridx = 1; c.gridy = 2; listPanel.add(rulesButton, c); c.gridx = 1; c.gridy = 3; listPanel.add(homogeneityLabel, c); c.gridx = 2; c.gridy = 4; listPanel.add(homogeneityTextfield, c); c.gridx = 1; c.gridy = 4; listPanel.add(frequencyLabel, c); c.gridx = 2; c.gridy = 5; listPanel.add(frequencyTextfield, c); c.gridx = 1; c.gridy = 5; listPanel.add(constructINAButton, c); c.gridx = 1; c.gridy = 6; listPanel.add(constructButton, c); c.gridx = 1; c.gridy = 7; listPanel.add(loadButton, c); c.gridx = 1; c.gridy = 8; listPanel.add(fusionButton, c); c.gridx = 1; c.gridy = 9; listPanel.add(displayFusionButton, c); //container.add(listPanel, BorderLayout.WEST); split.add(listPanel); guiFrame.setVisible(true); } /* Les trois fonctions suivantes permettent de choisir un fichier dans l'arborescence Il y en a trois mais il existe sans doute une technique pour n'en faire qu'une avec un parametre */ private static void setSameAsLinksFile() throws IOException { JFileChooser dialogue = new JFileChooser(new File(".")); File fichier; if (dialogue.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { fichier = dialogue.getSelectedFile(); sameAsLinksFile = fichier.getPath(); } } private static void setDatasetFile1() throws IOException { JFileChooser dialogue = new JFileChooser(new File(".")); File fichier; if (dialogue.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { fichier = dialogue.getSelectedFile(); datasetFile1 = fichier.getPath(); } } private static void setDatasetFile2() throws IOException { JFileChooser dialogue = new JFileChooser(new File(".")); File fichier; if (dialogue.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { fichier = dialogue.getSelectedFile(); datasetFile2 = fichier.getPath(); } } /** * @throws FileNotFoundException * @throws IOException * @throws URISyntaxException */ private static void constructINAGraph() throws FileNotFoundException, IOException, URISyntaxException { getSourcesInfo(new File("Fusion/sourceInfo.txt")); getINASameAsLinks(new File("dataINA/res.n3")); // fichier en parametre ... // "Fusion\\logicRules.txt" System.out.print("Creation des classes d'equivalences... "); createReconciledReferences(); System.out.println("fait."); System.out.print("Chargement des donnees... "); // fichier pas en parametre ... parseINAData(); System.out.println("fait."); System.out.print("Calcul des scores de qualites... "); calculateScore(); System.out.println("fait"); System.out.print("Ecriture avec annotations dans le fichier " + outputFile + "... "); writeINAData(outputFile); System.out.println("fait."); System.out.println("Fin de l'xcution."); } private static void genericConstructGraph() throws FileNotFoundException, IOException, AlignmentException, URISyntaxException { //pas besoin des ontologies... // String sourceInfoFile, String logicRulesFile, String dataFile1, String dataFile2, String sameAsLinksFile //getSameAsLinks(new File(mappingFile)); getSourcesInfo(new File(sourceInfoFile)); getSameAsLinks(new File(sameAsLinksFile)); //getINASameAsLinks(new File(sameAsLinksFile)); // fichier en parametre ... // "Fusion\\logicRules.txt" System.out.print("Creation des classes d'equivalences... "); createReconciledReferences(); System.out.println("fait."); System.out.print("Creation des classes d'equivalences... "); createReconciledReferences(); System.out.println("fait."); // rewrite - mapping (fait de facon externe) System.out.print("Chargement des donnees... "); parseData(datasetFile1, datasetFile2); System.out.println("fait."); System.out.print("Calcul des scores de qualites... "); calculateScore(); System.out.println("fait"); System.out.print("Ecriture avec annotations dans le fichier " + outputFile + "... "); writeData(outputFile); System.out.println("fait."); System.out.println("Fin de l'xcution."); } // data fusion avec la requete private static void executeFusion() { System.out.print("Extraction de resultats significatifs... "); try { Extraction extract = new Extraction(graph); // extract.testStatement(); ////TEST////// fusionGraph = extract.fusionModel(); System.out.println("fait."); } catch (Exception e) { System.out.println("Erreur: " + e); } System.out.println("Fin de l'excution."); } // open outputFile and load it into a Model private static void loadGraph() { System.out.print("Chargement du graphe... "); try { Model model = ModelFactory.createDefaultModel(); model.read(outputFile); graph = model; System.out.println("fait."); } catch (Exception e) { System.out.println("Erreur: " + e); } System.out.println("Fin de l'excution."); //////////////// TEST// Pour RDF validator /* * Model test = ModelFactory.createDefaultModel(); * test.read("fusionResult.rdf"); Extraction ex = new Extraction(test); * String query = "CONSTRUCT {<http://locahost/rdf/saSet1> ?prop ?val ." * + " ?val ?p ?v}" + //<http://localhost/rdf/hasValue> ou ?p * "WHERE {<http://localhost/rdf/saSet1> ?prop ?val ." + * " ?val ?p ?v}"; //pareil ex.constructQuery(query, * "queryResult.rdf"); */ ///////// FIN TEST// } private static void displayFusedGraph() { if (!treeDisplayed) { System.out.print("Affichage des resultats... "); try { RDFTree displayer = new RDFTree(fusionGraph, graph); JScrollPane treeDisplay = displayer.display(); //container.add(treeDisplay, BorderLayout.CENTER); split.add(treeDisplay); guiFrame.setVisible(true); treeDisplayed = true; System.out.println("fait."); } catch (Exception e) { System.out.println("Erreur: " + e); } System.out.println("Fin de l'excution."); } else System.out.println("Les donnees sont deja affichees."); } // parse sourceInfo.txt file and store freshness and reliability of sources private static void getSourcesInfo(File file) throws FileNotFoundException, IOException { try (BufferedReader br = new BufferedReader(new FileReader(file))) { String line = br.readLine(); String name; float reliability; float freshness; while (line != null) { if (line.contains("source")) { name = line.substring(line.indexOf(":") + 2); line = br.readLine(); reliability = Float.parseFloat(line.substring(line.indexOf("reliability") + 12)); line = br.readLine(); freshness = Float.parseFloat(line.substring(line.indexOf("freshness") + 10)); Source source = new Source(name, reliability, freshness); sources.put(source.getName(), source); } line = br.readLine(); } } } //parse logicRules.txt file and store compaptibility rules private static void getLogicRules(File file) throws FileNotFoundException, IOException { try (BufferedReader br = new BufferedReader(new FileReader(file))) { String line = br.readLine(); String[] parts; while (line != null) { parts = line.split(" "); //stocker parts (prop1, comparateur, prop2) dans une structure de donne static adquate line = br.readLine(); } } } // parse sameAs links for INA dataset. Create SameAsLink objects and // distinctURLs table private static void getINASameAsLinks(File file) throws FileNotFoundException, IOException, URISyntaxException { try (BufferedReader br = new BufferedReader(new FileReader(file))) { StringBuilder sb = new StringBuilder(); String line = br.readLine(); while (line != null) { if (line.contains("owl:sameAs")) { sb.append(line); sb.append(System.lineSeparator()); } line = br.readLine(); } String linksString = sb.toString(); String[] linksArray = linksString.split("\n"); for (int i = 0; i < linksArray.length; i++) { String value1 = linksArray[i].substring(0, linksArray[i].indexOf(" owl:sameAs")); String value2 = linksArray[i].substring(linksArray[i].lastIndexOf("owl:sameAs ") + 11, linksArray[i].length() - 1); value1 = StringUtils.removeStart(value1, "<"); value2 = StringUtils.removeStart(value2, "<"); value1 = StringUtils.removeStart(value1, "http://www.ina.fr/thesaurus/pp/"); value2 = StringUtils.removeStart(value2, "http://www.ina.fr/thesaurus/pp/"); value1 = StringUtils.removeStart(value1, "http://fr.dbpedia.org/resource/"); // value2 = StringUtils.removeStart(value2, "http://fr.dbpedia.org/resource/"); // value1 = StringUtils.removeEnd(value1, "."); value2 = StringUtils.removeEnd(value2, "."); value1 = StringUtils.removeEnd(value1, ">"); value2 = StringUtils.removeEnd(value2, ">"); if (!distinctURLs.containsKey(value1.hashCode())) distinctURLs.put(value1.hashCode(), new URI(value1)); if (!distinctURLs.containsKey(value2.hashCode())) distinctURLs.put(value2.hashCode(), new URI(value2)); links.add(new SameAsLink(distinctURLs.get(value1.hashCode()), distinctURLs.get(value2.hashCode()))); // adding same as links } for (Integer name : distinctURLs.keySet()) { Instance instance = new Instance(distinctURLs.get(name), null); if (distinctURLs.get(name).toString().contains("dbpedia")) instance.setSource(sources.get("fr.dbpedia.org")); else instance.setSource(sources.get("www.ina.fr")); instances.put(instance.getUri(), instance); } } } // ces trois methodes seront supprims lorsque l'on utilisera des URI plutot que des Strings ? public static String getLocalFromUri(final String uri) throws URISyntaxException { // par exemple http://localhost/rdf/val1234 -> val1234 return uri.replaceFirst(".*/([^/?]+).*", "$1"); //URI u = new URI(uri); //return u.getPath(); } public static String getFragmentFromUri(final String uri) throws URISyntaxException { // par exemple http://localhost/rdf/val1234 -> localhost URI u = new URI(uri); return u.getFragment(); } public static String getHostFromUri(final String uri) throws URISyntaxException { // par exemple http://localhost/rdf/val1234 -> localhost URI u = new URI(uri); return u.getHost(); } private static void getMappings(File file) throws FileNotFoundException, IOException, AlignmentException { AlignmentParser a = new AlignmentParser(); Alignment o = a.parse(file.toURI()); for (Cell c : o) { java.net.URI value1 = c.getObject1AsURI(); // getLocalFromUri( java.net.URI value2 = c.getObject2AsURI(); // System.out.println("..."+value1); // System.out.println("---"+value2); // keeps only the end of URI ? // if (!distinctURLs.containsKey(value1.hashCode())) // distinctURLs.put(value1.hashCode(), value1); // if (!distinctURLs.containsKey(value2.hashCode())) // distinctURLs.put(value2.hashCode(), value2); // // links.add(new SameAsLink(distinctURLs.get(value1.hashCode()), distinctURLs.get(value2.hashCode()))); // adding same as links } // ADD SOURCES //detecter sources (et demander infos a l'utilisateur ?) // for (Integer intName : distinctURLs.keySet()) { // URI name = distinctURLs.get(intName); // Source source = sources.get(name.getHost()); // detecter la source // Instance instance = new Instance(name, source); // // instances.put(instance.getUri(), instance); // } } // parse sameAs links for dataset. Create SameAsLink objects and distinctURLs table private static void getSameAsLinks(File file) throws FileNotFoundException, IOException, AlignmentException { AlignmentParser a = new AlignmentParser(); Alignment o = a.parse(file.toURI()); for (Cell c : o) { java.net.URI value1 = c.getObject1AsURI(); // getLocalFromUri( java.net.URI value2 = c.getObject2AsURI(); // keeps only the end of URI ? if (!distinctURLs.containsKey(value1.hashCode())) distinctURLs.put(value1.hashCode(), value1); if (!distinctURLs.containsKey(value2.hashCode())) distinctURLs.put(value2.hashCode(), value2); links.add(new SameAsLink(distinctURLs.get(value1.hashCode()), distinctURLs.get(value2.hashCode()))); // adding same as links } // ADD SOURCES //detecter sources (et demander infos a l'utilisateur ?) for (Integer intName : distinctURLs.keySet()) { URI name = distinctURLs.get(intName); Source source = sources.get(name.getHost()); // detecter la source Instance instance = new Instance(name, source); instances.put(instance.getUri(), instance); } } // create a new Array of reconciled references private static void newReconciledReference(URI link1, URI link2) { ArrayList<URI> al = new ArrayList<>(); al.add(link1); al.add(link2); reconciled.add(al); } // create an array of arrays of reconciled references private static void createReconciledReferences() { for (int i = 0; i < links.size(); i++) { URI link1 = links.get(i).getValue1(); URI link2 = links.get(i).getValue2(); if (reconciled.isEmpty()) { newReconciledReference(link1, link2); } else { boolean first = false; boolean second = false; for (int k = 0; k < reconciled.size(); k++) { if (reconciled.get(k).contains(link1)) { first = true; } if (reconciled.get(k).contains(link2)) { second = true; } if (first && !second) { reconciled.get(k).add(link2); break; } else if (!first && second) { reconciled.get(k).add(link1); break; } } // if neither link exists, create a new reconciled reference // array if (!first && !second) { newReconciledReference(link1, link2); } } } } // parse INA dataset, create all objects for properties, instances, triples // and values private static void parseINAData() throws FileNotFoundException, IOException { try (Scanner scanner = new Scanner(new File("dataINA/dataset_novideoperson.ttl")) .useDelimiter(Pattern.compile("^\\s*$", Pattern.MULTILINE))) { // On parcourt le fichier avec les donnees while (scanner.hasNext()) { String token = scanner.next(); String propName = ""; String propValue = ""; token = new String(token.trim().getBytes(), Charset.forName("UTF-8")); // for (Integer name: distinctURLs.keySet()){ // String value = new String(distinctURLs.get(name).getBytes(), // Charset.forName("UTF-8")); for (URI name : instances.keySet()) { Instance inst = instances.get(name); // Moins optimise que // de parcourir les // distinctURLs ? URI value = inst.getUri(); if (token.contains(value.toString())) { // le scanner lit l'URL try (Scanner scannerToken = new Scanner(token)) { while (scannerToken.hasNextLine()) { String line = scannerToken.nextLine(); line = line.trim(); // properties start with "notice:" if (line.startsWith("notice")) { if (line.contains(" ")) { propName = line.substring(7, line.indexOf(" ")); propValue = line.substring(line.indexOf(" ")).trim().replaceAll("\\s+", " "); // Instance inst = instances.get(value); // add property in map if it doesn't // exist Property prop; if (!inst.containsProperty(propName)) { prop = new Property(propName); inst.addToProperties(prop); } else prop = inst.getProperty(propName); // get datatype values if (propValue.startsWith("\"")) { propValue = StringUtils.removeStart(propValue, "\""); propValue = StringUtils.substringBeforeLast(propValue, "\""); Value val = new Value(propValue); if (value.toString().contains("dbpedia")) { val.setSource(sources.get("fr.dbpedia.org")); } else { val.setSource(sources.get("www.ina.fr")); } if (!prop.containsValue(propValue)) // prop.addToValues(val); } } // System.out.println("=== "+line); } // LineStarts(notice) // System.out.println("*** "+line); } } } } } } } // // // parse dataset, create all objects for properties, instances, and values // private static void rewriteDataFileWithMappings(String datasetFile, String mappings) throws FileNotFoundException, IOException, URISyntaxException { // // un fichier suffit, chaque fichier en plus sera ajoute au model avec un Union // // // Model model = ModelFactory.createDefaultModel(); // try{ // // Model model1 = ModelFactory.createDefaultModel(); // model1.read(datasetFile); // // MappingSet ms = MappingSet.createModelMappingSet(); // // // // read RDF and fill data (instance, property, value ...) // // for (URI name : instances.keySet()) { // Instance inst = instances.get(name); // // System.out.println("++++ "+name); // // //String aModif = "http://www.okkam.org/oaie/"; // temporaire // Resource o = model.getResource(name.toString()); // StmtIterator iter = model.listStatements(o, null, (RDFNode) null); // // //System.out.println("*** "+iter); // while(iter.hasNext() ==true){ // // Statement stmt = iter.nextStatement(); // // System.out.println("=== "+stmt); // //System.out.println("*** "+stmt); // com.hp.hpl.jena.rdf.model.Property p = stmt.getPredicate(); // // String propName = new URI(p.getURI()).getFragment(); // // Property prop; // // if (!inst.containsProperty(propName)) { // System.out.println(".... "+propName); // // prop = new Property(propName); // inst.addToProperties(prop); // } else { // prop = inst.getProperty(propName); // inst.addToProperties(prop); // } // // String propValue; // // RDFNode node = stmt.getObject(); // if(node.isLiteral()){ // Literal // Literal nodeL = (Literal) node; // propValue = nodeL.getString(); // } // else{ // Resource // Resource nodeR = (Resource) node; // propValue = nodeR.getURI(); // } // // Value val = new Value(propValue); // val.setSource(inst.getSource()); // // if (!prop.containsValue(propValue)) // prop.addToValues(val); // // } // } // }catch(Exception e){ // System.out.println(e.toString()); // } // } // // // parse dataset, create all objects for properties, instances, and values private static void parseData(String datasetFile1, String datasetFile2) throws FileNotFoundException, IOException, URISyntaxException { // un fichier suffit, chaque fichier en plus sera ajoute au model avec un Union Model model = ModelFactory.createDefaultModel(); try { if (!datasetFile1.equals(datasetFile2)) {// two different files Model model1 = ModelFactory.createDefaultModel(); model1.read(datasetFile1); Model model2 = ModelFactory.createDefaultModel(); model2.read(datasetFile2); model = ModelFactory.createUnion(model1, model2); // union des deux models } else {//one file model.read(datasetFile1); } // read RDF and fill data (instance, property, value ...) for (URI name : instances.keySet()) { Instance inst = instances.get(name); // System.out.println("++++ "+name); //String aModif = "http://www.okkam.org/oaie/"; // temporaire Resource o = model.getResource(name.toString()); StmtIterator iter = model.listStatements(o, null, (RDFNode) null); //System.out.println("*** "+iter); while (iter.hasNext() == true) { Statement stmt = iter.nextStatement(); // System.out.println("=== "+stmt); //System.out.println("*** "+stmt); com.hp.hpl.jena.rdf.model.Property p = stmt.getPredicate(); String propName = new URI(p.getURI()).getFragment(); Property prop; if (!inst.containsProperty(propName)) { System.out.println(".... " + propName); prop = new Property(propName); inst.addToProperties(prop); } else { prop = inst.getProperty(propName); inst.addToProperties(prop); } String propValue; RDFNode node = stmt.getObject(); if (node.isLiteral()) { // Literal Literal nodeL = (Literal) node; propValue = nodeL.getString(); } else { // Resource Resource nodeR = (Resource) node; propValue = nodeR.getURI(); } Value val = new Value(propValue); val.setSource(inst.getSource()); if (!prop.containsValue(propValue)) prop.addToValues(val); } } } catch (Exception e) { System.out.println(e.toString()); } } // calculate all quality criteria for a value private static void calculateScore() { for (URI name : instances.keySet()) { Instance inst = instances.get(name); ArrayList<URI> rec = new ArrayList<>(); for (int k = 0; k < reconciled.size(); k++) { if (reconciled.get(k).contains(inst.getUri())) { rec = reconciled.get(k); } } ArrayList<Instance> references = new ArrayList<>(); for (int g = 0; g < rec.size(); g++) { references.add(instances.get(rec.get(g))); } for (int i = 0; i < inst.getProperties().size(); i++) { Property prop = inst.getProperties().get(i); ArrayList<Value> homog = new ArrayList<>(); for (int a = 0; a < references.size(); a++) { for (int b = 0; b < references.get(a).getProperties().size(); b++) { if (!(references.get(a).getProperties().isEmpty())) { //System.out.println("----"+references.get(a).getProperties());//.get(b).getName()); if (references.get(a).getProperties().get(b).getName().equals(prop.getName())) { homog.addAll(references.get(a).getProperties().get(b).getValues()); } } } //System.out.println("---- homog: "+homog); } // // // discover implausible values and calculate quality score for // plausible values for (int j = 0; j < prop.getValues().size(); j++) { Value value = prop.getValues().get(j); String valueStr = value.getValue(); Integer freq = 0; // calculate homogeneity for (int o = 0; o < homog.size(); o++) { if (homog.get(o).getValue().equals(valueStr)) { ++freq; } } // Integer mpBonus = 0; // calculate morePrecise for (int o = 0; o < homog.size(); o++) { boolean ret = isMorePrecise(value, (Value) homog.get(o)); if (ret == true) ++mpBonus; } //System.out.println(" *** : "+value.getValue()+ " MP "+value.dispalyIsMorePreciseThan()); // System.out.println(" *** : "+value.dispalyIsMorePreciseThanStr()); // // try{ // // calculate isSynonym // for (int o = 0; o < homog.size(); o++) { // if((isSynonym(value,(Value)homog.get(o))==true)); // } // }catch(Exception e){System.out.println(e.toString());} // // System.out.println(" +++ : isSyn "+value.dispalyIsSynonym()); // Float homogeneity = ((float) freq / homog.size()); value.setHomogeneity(homogeneity); // calculate occurrence frequency freq = 0; for (int o = 0; o < prop.getValues().size(); o++) { if (prop.getValues().get(o).getValue().equals(valueStr)) { ++freq; } } Float occurrenceFrequency = ((float) freq / prop.getValues().size()); value.setOccurrenceFrequency(occurrenceFrequency); if (/* violatesRules(value) || */ (value.getHomogeneity() < Fusion.homogeneityThreshold || value.getOccurrenceFrequency() < Fusion.occurenceFrequencyThreshold)) { value.setImplausible(true); } else // apparemment inutile mais pourquoi ? value.setImplausible(false); if (!value.getImplausible()) { // si plausible /*value.setQualityScore((value.getHomogeneity() + value.getOccurrenceFrequency() ) / 2); // test sans source value.setQualityValue(Fusion.getQualityValue(value.getQualityScore()));*/ value.setQualityScore((value.getHomogeneity() + value.getOccurrenceFrequency() + value.getSource().getFreshness() + value.getSource().getReliability()) / 4); value.setQualityValue(Fusion.getQualityValue(value.getQualityScore())); } } //Exploration of the set of values a given P. } //Exploration of the set of properties } } private static String getQualityValue(float qualityScore) { if (qualityScore <= 0.34) return "poor"; else if (qualityScore <= 0.67) return "average"; else return "excellent"; } /* * FOR each val in attribute.listOfValues: IF value.contains(val) OR * knowledgeBaseAbout(value, val): value.morePreciseThan.add(val) * * IF value.isSynonym(val): value.synonyms.add(val) * * IF value.violatesExpertRule(): value.incompatibilities.add(Rule, * property) } * * //check expert rules violations -- not implemented private static boolean * violatesRules(Value value){ value.setViolatedRules(""); return true; * * } */ // check if values are more precise according to specific ontologies -- not // implemented private static boolean checkKnowledgeBase(String val1, String val2) { return false; } // define if a value is more precise than another private static boolean isMorePrecise(Value val1, Value val2) { String v11 = val1.getValue(); String v21 = val2.getValue(); String v1 = Utils.removePunctuations(v11).toLowerCase(); String v2 = Utils.removePunctuations(v21).toLowerCase(); //if ((v1.contains(v2)))// && (!v1.equals(v2))) // || checkKnowledgeBase(val1.getValue(), val2.getValue())) if (!(v1.equals(v2)) && (v1.indexOf(v2) > -1)) { //System.out.println("("+v1+","+v2+")"); if (!(val1.getIsMorePreciseThanStr()).contains(v21)) //val1.addToMorePrecise(val2); val1.addToMorePreciseStr(v21); return true; } else return false; // if (val2.getValue().contains(val1.getValue()) || checkKnowledgeBase(val2.getValue(), val1.getValue())) // // val1.addToMorePrecise(val2); // return val2; // // if (val1.getValue().length() > val2.getValue().length()) // comparer la longueur // return val1; // else // return val2; } // define if a value is more precise than another // private static Value isMorePrecise(Value val1, Value val2) { // // if (val1.getValue().contains(val2.getValue()) || checkKnowledgeBase(val1.getValue(), val2.getValue())) // // val1.addToMorePrecise(val2); // return val1; // // if (val2.getValue().contains(val1.getValue()) || checkKnowledgeBase(val2.getValue(), val1.getValue())) // // val1.addToMorePrecise(val2); // return val2; // // if (val1.getValue().length() > val2.getValue().length()) // comparer la longueur // return val1; // else // return val2; // // } // define if two values are synonyms private static boolean isSynonym(Value val1, Value val2) throws IOException { boolean isSyn = false; String thesaurusUrl = "http://words.bighugelabs.com/api/2/92eae7f933f0f63404b3438ca46861e5/" + val1.getValue() + "/xml"; Document doc = Jsoup.connect(thesaurusUrl).get(); Elements synonyms = doc.select("w"); String syn = synonyms.html(); String[] synonymsArray = syn.split("\n"); ArrayList<String> synonymsList = new ArrayList<String>(Arrays.asList(synonymsArray)); if (synonymsList.contains(val2.getValue())) { val1.addToSynonyms(val2); val2.addToSynonyms(val1); isSyn = true; } return isSyn; } private static void writeINAData(String outputFile) { // URI String ns = "http://localhost/rdf/"; String saSetURI = ns + "saSet"; String valURI = ns + "val"; int lURI = 0; // pour ne pas remettre a zero les compteurs a chaque // parcours (URI pour les values) try { // create an empty model Model model = ModelFactory.createDefaultModel(); Resource saSets = model.createResource(ns + "saSets"); // racine qui // relit les // classes // d'equivalences // Ainsi on a un graphe plutot que plusieurs composantes connexes for (int i = 0; i < reconciled.size(); i++) { // on parcourt les // classes // d'equivalence String aSASetURI = new String(saSetURI + i); Resource aSaSet = model.createResource(aSASetURI); HashMap<String, Value> bestValues = new HashMap<String, Value>(); // ici, // le // choix // est // fait // de // ne // garder // qu'UNE // valeur // par // propriete saSets.addProperty(model.getProperty(ns + "hasSameAsSet"), aSaSet); for (int j = 0; j < reconciled.get(i).size(); j++) { // on // parcourt // les // instances // au // sein // d'une // classe Instance inst = instances.get(reconciled.get(i).get(j)); for (int k = 0; k < inst.getProperties().size(); k++) { // on // parcout // les // proprietes // d'une // instance Property prop = inst.getProperties().get(k); // il faut que l'unicite des proprietes soit deja // etablie for (int l = 0; l < prop.getValues().size(); l++) { // on // parcourt // les // valeurs // d'une // propriete // peut etre a enlever en ameliorant la structure de // donnees ? String aValURI = valURI + lURI; lURI++; Value value = prop.getValues().get(l); value.setUri(aValURI); Resource val = model.createResource(aValURI); aSaSet.addProperty(model.getProperty(ns + prop.getName()), val); // PAF // annotations val.addProperty(model.getProperty(ns + "hasValue"), value.getValue()); // la // valeur val.addProperty(model.getProperty(ns + "isFrom"), inst.getUri().toString()); // l'instance // de // provenance // (chaque // instance // possede // une // URL // unique) val.addProperty(model.getProperty(ns + "isImplausible"), String.valueOf(value.getImplausible())); val.addProperty(model.getProperty(ns + "hasOccurenceFrequency"), String.valueOf(value.getOccurrenceFrequency())); val.addProperty(model.getProperty(ns + "hasHomogeneity"), String.valueOf(value.getHomogeneity())); if (!value.getImplausible()) { // si la valeur est // plausible val.addProperty(model.getProperty(ns + "hasReliability"), String.valueOf(value.getSource().getReliability())); val.addProperty(model.getProperty(ns + "hasFreshness"), String.valueOf(value.getSource().getFreshness())); val.addProperty(model.getProperty(ns + "hasQualityScore"), String.valueOf(value.getQualityScore())); val.addProperty(model.getProperty(ns + "hasQualityValue"), value.getQualityValue()); Value oldValue = bestValues.get(prop.getName()); // pas // reussi // a // faire // plus // propre... if (oldValue == null) bestValues.put(prop.getName(), value); else { if (value.getQualityScore() > oldValue.getQualityScore()) bestValues.put(prop.getName(), value); } } } } } for (Entry<String, Value> e : bestValues.entrySet()) { // annotations // meilleurs // valeurs model.getResource(e.getValue().getUri()).addProperty(model.getProperty(ns + "hasMaximumScore"), "true"); // vrai si la valeur a le meilleur score // pour sa propriete et sa classe // d'equivalence (la valeur sera gardee // lors de la fusion) // aSaSet.addProperty(model.getProperty(ns + e.getKey()), // model.getResource(e.getValue().getUri())); // real fusion } } // Ecriture dans le fichier FileOutputStream fout = new FileOutputStream(outputFile); model.write(fout); graph = model; // a voir } catch (Exception e) { System.out.println("Failed: " + e); } } private static void writeData(String outputFile) { // URI String ns = "http://localhost/rdf/"; // /!\ Structure modifier: on se sert de ns pour combler l'information perdu sur l'URI. L'utilisation de Resource ou d'URI plutot que des Strings permetterait de ne jamais perde l'information ! String saSetURI = ns + "saSet"; String valURI = ns + "val"; int lURI = 0; // pour ne pas remettre a zero les compteurs a chaque // parcours (URI pour les values) try { // create an empty model Model model = ModelFactory.createDefaultModel(); Resource saSets = model.createResource(ns + "saSets"); // racine qui relit les classes d'equivalences // Ainsi on a un graphe plutot que plusieurs composantes connexes for (int i = 0; i < reconciled.size(); i++) { // on parcourt les classes d'equivalence String aSASetURI = new String(saSetURI + i); Resource aSaSet = model.createResource(aSASetURI); HashMap<String, Value> bestValues = new HashMap<String, Value>(); // ici, le choix est fait de ne garder qu'UNE valeur par propriete saSets.addProperty(model.getProperty(ns + "hasSameAsSet"), aSaSet); for (int j = 0; j < reconciled.get(i).size(); j++) { // on parcourt les instances au sein d'une classe Instance inst = instances.get(reconciled.get(i).get(j)); for (int k = 0; k < inst.getProperties().size(); k++) { // on parcourt les proprietes d'une instance Property prop = inst.getProperties().get(k); // il faut que l'unicite des proprietes soit deja etablie for (int l = 0; l < prop.getValues().size(); l++) { // on parcourt les valeurs d'une propriete (peut etre a enlever en ameliorant la structure de donnees ?) String aValURI = valURI + lURI; lURI++; Value value = prop.getValues().get(l); value.setUri(aValURI); Resource val = model.createResource(aValURI); aSaSet.addProperty(model.getProperty(ns + prop.getName()), val); // PAF (ns + ...) ? // annotations val.addProperty(model.getProperty(ns + "hasValue"), getLocalFromUri(value.getValue())); // la valeur //getLocalFromUri() a faire mieux val.addProperty(model.getProperty(ns + "isFrom"), inst.getUri().toString().replaceFirst(".*/([^/?]+).*", "$1")); // l'instance de provenance (chaque instance possede une URI unique) val.addProperty(model.getProperty(ns + "isImplausible"), String.valueOf(value.getImplausible())); val.addProperty(model.getProperty(ns + "hasOccurenceFrequency"), String.valueOf(value.getOccurrenceFrequency())); val.addProperty(model.getProperty(ns + "hasHomogeneity"), String.valueOf(value.getHomogeneity())); if (!value.getImplausible()) { // si la valeur est plausible val.addProperty(model.getProperty(ns + "hasReliability"), String.valueOf(value.getSource().getReliability())); val.addProperty(model.getProperty(ns + "hasFreshness"), String.valueOf(value.getSource().getFreshness())); // val.addProperty(model.getProperty(ns + "hasQualityScore"), String.valueOf(value.getQualityScore())); val.addProperty(model.getProperty(ns + "hasQualityValue"), value.getQualityValue()); for (int z = 0; z < value.getIsMorePreciseThanStr().size(); z++) { val.addProperty(model.getProperty(ns + "morePreciseThan"), value.getIsMorePreciseThanStr().get(z)); //.getValue()); } Value oldValue = bestValues.get(prop.getName()); // pas reussi a faire plus propre if (oldValue == null) bestValues.put(prop.getName(), value); else { if (value.getQualityScore() > oldValue.getQualityScore()) bestValues.put(prop.getName(), value); } } } } } for (Entry<String, Value> e : bestValues.entrySet()) { // annotations // meilleurs // valeurs model.getResource(e.getValue().getUri()).addProperty(model.getProperty(ns + "hasMaximumScore"), "true"); // vrai si la valeur a le meilleur score // pour sa propriete et sa classe // d'equivalence (la valeur sera gardee // lors de la fusion) // aSaSet.addProperty(model.getProperty(ns + e.getKey()), // model.getResource(e.getValue().getUri())); // real fusion } } // Ecriture dans le fichier FileOutputStream fout = new FileOutputStream(outputFile); model.write(fout); graph = model; // a voir } catch (Exception e) { System.out.println("Failed: " + e); } } } class MyInputVerifier extends InputVerifier { @Override public boolean verify(JComponent input) { String text = ((JTextField) input).getText(); Float test = null; try { test = Float.parseFloat(text); } catch (NumberFormatException numberFormatException) { } if (test instanceof Float && test <= 1 && test >= 0) { return true; } // JOptionPane.showMessageDialog(Fusion.guiFrame, "Please enter a real // number between 0 and 1!"); return false; } }