List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:demo.Demo.java
License:Open Source License
/** * @param args//from w w w . ja va 2 s.co m * @throws IOException */ public static void main(String[] args) throws IOException { CSVLoader loader = new CSVLoader(); System.out.println("Downloading dataset..."); URL oracle = new URL("http://repository.seasr.org/Datasets/UCI/csv/mushroom.csv"); File csvFile = File.createTempFile("data-", ".csv"); BufferedReader in = new BufferedReader(new InputStreamReader(oracle.openStream())); PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(csvFile))); String inputLine; while ((inputLine = in.readLine()) != null) { out.println(inputLine); } in.close(); out.close(); System.out.println("Dataset written to: " + csvFile.getAbsolutePath()); loader.setFile(csvFile); loader.setNominalAttributes("first-last"); Instances instances = loader.getDataSet(); String[] variablesNames = new String[instances.numAttributes()]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); } ChordalysisModelling modeller = new ChordalysisModelling(0.05); System.out.println("Learning..."); modeller.buildModel(instances); DecomposableModel bestModel = modeller.getModel(); bestModel.display(variablesNames); System.out.println("The model selected is:"); System.out.println(bestModel.toString(variablesNames)); bestModel.display(variablesNames); }
From source file:demo.DemoInference.java
License:Open Source License
/** * @param args/*from www.j a va 2 s . c o m*/ * @throws IOException */ public static void main(String[] args) throws IOException { CSVLoader loader = new CSVLoader(); System.out.println("Downloading dataset..."); URL oracle = new URL("http://repository.seasr.org/Datasets/UCI/csv/mushroom.csv"); File csvFile = File.createTempFile("data-", ".csv"); BufferedReader in = new BufferedReader(new InputStreamReader(oracle.openStream())); PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(csvFile))); String inputLine; while ((inputLine = in.readLine()) != null) { out.println(inputLine); } in.close(); out.close(); System.out.println("Dataset written to: " + csvFile.getAbsolutePath()); loader.setFile(csvFile); loader.setNominalAttributes("first-last"); Instances instances = loader.getDataSet(); String[] variablesNames = new String[instances.numAttributes()]; String[][] outcomes = new String[instances.numAttributes()][]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); outcomes[i] = new String[instances.attribute(i).numValues() + 1];//+1 for missing for (int j = 0; j < outcomes[i].length - 1; j++) { outcomes[i][j] = instances.attribute(i).value(j); } outcomes[i][outcomes[i].length - 1] = "missing"; System.out.println("Dom(" + variablesNames[i] + ") = " + Arrays.toString(outcomes[i])); } ChordalysisModelling modeller = new ChordalysisModelling(0.05); System.out.println("Learning..."); modeller.buildModel(instances); DecomposableModel bestModel = modeller.getModel(); // bestModel.display(variablesNames); System.out.println("The model selected is:"); System.out.println(bestModel.toString(variablesNames)); Inference inference = new Inference(bestModel, variablesNames, outcomes); inference.setProbabilities(modeller.getLattice()); String targetVariable = "population"; System.out.println("initial beliefs on " + targetVariable + " " + Arrays.toString(inference.getBelief(targetVariable))); System.out.println("adding evidence poisonous and convex shape"); inference.addEvidence("class", "e"); inference.addEvidence("cap-shape", "x"); inference.recordEvidence(); System.out.println( "beliefs on " + targetVariable + " " + Arrays.toString(inference.getBelief(targetVariable))); inference.clearEvidences(); System.out.println("reset beliefs"); System.out.println( "reset beliefs on " + targetVariable + " " + Arrays.toString(inference.getBelief(targetVariable))); }
From source file:demo.Run.java
License:Open Source License
/** * @param args// ww w . ja va 2s . co m */ public static void main(String[] args) { if (args.length != 4) { System.out.println("Usage:\tjava -Xmx1g -jar Chordalysis.jar dataFile pvalue imageOutputFile useGUI?"); System.out.println("Example:\tjava -Xmx1g -jar Chordalysis.jar dataset.csv 0.05 graph.png false"); System.out.println("\nNote:\t'1g' means that you authorize 1GB of memory. " + "\nNote:\tIt should be adjusted depending upon the size of your data set (mostly required to load the data set)."); return; } System.out.println(); CSVLoader loader = new CSVLoader(); File csvFile = new File(args[0]); if (!csvFile.exists()) { System.out.println("The file doesn't exist"); return; } else { System.out.println("Info:\tUsing the dataset file " + csvFile.getAbsolutePath()); } double pValue = Double.valueOf(args[1]); if (pValue <= 0 || 1 <= pValue) { System.out.println("The p-value should be between 0 and 1 excluded. "); return; } else { System.out.println("Info:\tUsing p=" + pValue); } File outPutFile = new File(args[2]); String[] splitted = outPutFile.getName().split("\\."); if (splitted.length < 2) { System.out.println( "The image output file should declare an extension among \".jpg\", \".png\" or \".gif\""); return; } String extension = splitted[splitted.length - 1]; if (!extension.equals("jpg") && !extension.equals("png") && !extension.equals("gif")) { System.out.println( "The format for the graphical representation of the model should be either jpg, png or gif. "); return; } else { System.out.println("Info:\tExporting result as a " + extension + " file"); } boolean gui = Boolean.parseBoolean(args[3]); if (gui) { System.out.println("Info:\tUsing a graphical user interface"); } else { System.out.println("Info:\tNot using a graphical user interface"); } try { loader.setFile(csvFile); loader.setNominalAttributes("first-last"); Instances instances = loader.getDataSet(); String[] variablesNames = new String[instances.numAttributes()]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); } long start = System.currentTimeMillis(); ChordalysisModelling modeller = new ChordalysisModelling(pValue); modeller.buildModel(instances); DecomposableModel bestModel = modeller.getModel(); if (gui) bestModel.display(variablesNames); System.out .println("The model selected is: (selected in " + (System.currentTimeMillis() - start) + "ms)"); System.out.println(bestModel.toString(variablesNames)); ImageIO.write(bestModel.getImage(variablesNames), extension, outPutFile); } catch (IOException e) { System.out.println("I/O error while loading csv file"); e.printStackTrace(); } }
From source file:demo.RunDot.java
License:Open Source License
/** * @param args/*from w w w. j av a 2 s . c om*/ */ public static void main(String[] args) { if (args.length != 3) { System.out.println("Usage:\tjava -Xmx1g -jar Chordalysis.jar dataFile pvalue dotOutputFile"); System.out.println("Example:\tjava -Xmx1g -jar Chordalysis.jar dataset.csv 0.05 graph.dot"); System.out.println("\nNote:\t'1g' means that you authorize 1GB of memory. " + "\nNote:\tIt should be adjusted depending upon the size of your data set (mostly required to load the data set)."); return; } System.out.println(); CSVLoader loader = new CSVLoader(); File csvFile = new File(args[0]); if (!csvFile.exists()) { System.out.println("The file doesn't exist"); return; } else { System.out.println("Info:\tUsing the dataset file " + csvFile.getAbsolutePath()); } double pValue = Double.valueOf(args[1]); if (pValue <= 0 || 1 <= pValue) { System.out.println("The p-value should be between 0 and 1 excluded. "); return; } else { System.out.println("Info:\tUsing p=" + pValue); } File outPutFile = new File(args[2]); String[] splitted = outPutFile.getName().split("\\."); if (splitted.length < 2) { System.out.println("The image output file should declare a \".dot\" extension"); return; } try { loader.setFile(csvFile); loader.setNominalAttributes("first-last"); Instances instances = loader.getDataSet(); String[] variablesNames = new String[instances.numAttributes()]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); } long start = System.currentTimeMillis(); ChordalysisModelling modeller = new ChordalysisModelling(pValue); modeller.buildModel(instances); DecomposableModel bestModel = modeller.getModel(); System.out .println("The model selected is: (selected in " + (System.currentTimeMillis() - start) + "ms)"); System.out.println(bestModel.toString(variablesNames)); bestModel.exportDOT(outPutFile, variablesNames); System.out.println( "DOT file exported - note that the variables with no neighbors won't be included in the graph"); } catch (IOException e) { System.out.println("I/O error while loading csv file"); e.printStackTrace(); } }
From source file:demo.RunGUI.java
License:Open Source License
/** * @param args/*www . ja va 2s . co m*/ */ public static void main(String[] args) { JFileChooser chooser = new JFileChooser(); FileNameExtensionFilter filter = new FileNameExtensionFilter("CSV file", "csv"); chooser.setFileFilter(filter); int returnVal = chooser.showOpenDialog(null); if (returnVal == JFileChooser.APPROVE_OPTION) { System.out.println("You chose to open this file: " + chooser.getSelectedFile().getName()); } CSVLoader loader = new CSVLoader(); File csvFile = chooser.getSelectedFile(); if (!csvFile.exists()) { System.out.println("The file doesn't exist"); return; } double pValue = Double.valueOf(JOptionPane.showInputDialog("Desired p-value ]0,1[", 0.05)); if (pValue <= 0 || 1 <= pValue) { System.out.println("The p-value should be between 0 and 1 excluded. "); return; } try { loader.setFile(csvFile); loader.setNominalAttributes("first-last"); Instances instances = loader.getDataSet(); String[] variablesNames = new String[instances.numAttributes()]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); } ChordalysisModelling modeller = new ChordalysisModelling(pValue); modeller.buildModel(instances); DecomposableModel bestModel = modeller.getModel(); System.out.println("The model selected is:"); System.out.println(bestModel.toString(variablesNames)); bestModel.display(variablesNames); } catch (IOException e) { System.out.println("I/O error while loading csv file"); e.printStackTrace(); } }
From source file:demo.RunGUIProof.java
License:Open Source License
/** * @param args/*from w w w .jav a 2 s . c o m*/ */ public static void main(String[] args) { JOptionPane.showMessageDialog(null, introductionMessage, "Chordalysis", JOptionPane.INFORMATION_MESSAGE); int result = JOptionPane.showOptionDialog(null, new JTextArea(agreeCitation), "Reference", JOptionPane.YES_NO_OPTION, JOptionPane.QUESTION_MESSAGE, null, null, null); if (result == JOptionPane.NO_OPTION || result == JOptionPane.CLOSED_OPTION) { JOptionPane.showMessageDialog(null, "Chordalysis will now stop, because you do not want to reference its source. ", "Chordalysis", JOptionPane.WARNING_MESSAGE); System.exit(0); } JFileChooser chooser = new JFileChooser(); FileNameExtensionFilter filter = new FileNameExtensionFilter("CSV file", "csv"); chooser.setFileFilter(filter); int returnVal = chooser.showOpenDialog(null); File csvFile = null; if (returnVal == JFileChooser.APPROVE_OPTION) { csvFile = chooser.getSelectedFile(); System.out.println("You chose to open: " + csvFile); } else { JOptionPane.showMessageDialog(null, noFileSelectedMessage, "Chordalysis", JOptionPane.ERROR_MESSAGE); return; } CSVLoader loader = new CSVLoader(); if (!csvFile.exists()) { JOptionPane.showMessageDialog(null, noFileMessage, "Chordalysis", JOptionPane.INFORMATION_MESSAGE); return; } double pValue = -1; while (pValue <= 0 || 1 <= pValue) { pValue = Double.valueOf(JOptionPane.showInputDialog("Desired p-value (between 0 and 1)", 0.05)); if (pValue <= 0 || 1 <= pValue) { JOptionPane.showMessageDialog(null, incorrectPValueMessage, "Chordalysis", JOptionPane.WARNING_MESSAGE); } } filter = new FileNameExtensionFilter("PNG or DOT or CSV file or DNE file", "png", "dot", "csv", "dne"); chooser = new JFileChooser(); chooser.setFileFilter(filter); chooser.setDialogTitle("Where to save the graph?"); chooser.setSelectedFile(new File(csvFile.getAbsolutePath() + ".png")); returnVal = chooser.showSaveDialog(null); File graphFile = null; if (returnVal == JFileChooser.APPROVE_OPTION) { graphFile = chooser.getSelectedFile(); System.out.println("You chose to save the graph to: " + graphFile.getAbsolutePath()); } else { JOptionPane.showMessageDialog(null, noFileSelectedMessage, "Chordalysis", JOptionPane.ERROR_MESSAGE); return; } try { loader.setFile(csvFile); returnVal = JOptionPane.showConfirmDialog(null, "Are all of your attribute nominal?", "Chordalysis", JOptionPane.YES_NO_OPTION); if (returnVal == JOptionPane.YES_OPTION) { loader.setNominalAttributes("first-last"); } Instances instances = loader.getDataSet(); String cols = ""; for (int i = 0; i < instances.numAttributes(); i++) { Attribute att = instances.attribute(i); if (!att.isNominal()) { cols += (i + 1) + ","; } } if (!cols.isEmpty()) { cols = cols.substring(0, cols.length() - 1); String message = "Some atributes are not nominal (number " + cols + "), please wait during discretization. "; JOptionPane.showMessageDialog(null, message, "Chordalysis", JOptionPane.INFORMATION_MESSAGE); Discretize discretizer = new Discretize(cols); discretizer.setUseEqualFrequency(true); discretizer.setBins(3); discretizer.setIgnoreClass(true); discretizer.setInputFormat(instances); instances = Filter.useFilter(instances, discretizer); JOptionPane.showMessageDialog(null, "Discretization is now finished.", "Chordalysis", JOptionPane.INFORMATION_MESSAGE); } String[] variablesNames = new String[instances.numAttributes()]; String[][] outcomes = new String[instances.numAttributes()][]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); outcomes[i] = new String[instances.attribute(i).numValues()]; for (int j = 0; j < outcomes[i].length; j++) { outcomes[i][j] = instances.attribute(i).value(j); } } ChordalysisModelling modeller = new ChordalysisModelling(pValue); modeller.buildModel(instances); DecomposableModel bestModel = modeller.getModel(); JOptionPane.showMessageDialog(null, new JTextArea("Chordalysis has now finished analysing your data. " + "\nIf you found something useful, please reference Chordalysis as" + "\n\t- F. Petitjean, G.I. Webb and A. Nicholson, Scaling log-linear analysis to high-dimensional data, ICDM 2013" + "\n\t- F. Petitjean and G.I. Webb, Scaling log-linear analysis to datasets with thousands of variables, SDM 2015" + "\n\nYou can find the output file at: '" + graphFile.getAbsolutePath() + "'"), "Citation", JOptionPane.INFORMATION_MESSAGE); System.out.println("The model selected is:"); System.out.println(bestModel.toString(variablesNames)); if (graphFile.getName().endsWith("dot")) { bestModel.exportDOT(graphFile, variablesNames); } else if (graphFile.getName().endsWith("png")) { ImageIO.write(bestModel.getImage(variablesNames), "png", graphFile); } else if (graphFile.getName().endsWith("dne")) { bestModel.exportBNNetica(graphFile, variablesNames, outcomes); bestModel.exportDOT(new File(graphFile.getAbsolutePath() + ".dot"), variablesNames); ImageIO.write(bestModel.getImage(variablesNames), "png", new File(graphFile.getAbsolutePath() + ".png")); bestModel.saveAssociations(variablesNames, new File(graphFile.getAbsolutePath() + ".csv")); } else { bestModel.saveAssociations(variablesNames, graphFile); } } catch (IOException e) { JOptionPane.showMessageDialog(null, "The file '" + csvFile.getAbsolutePath() + "'\ncannot be read properly.", "Error while reading file", JOptionPane.ERROR_MESSAGE); System.out.println("I/O error while loading csv file"); e.printStackTrace(); } catch (Exception e) { JOptionPane.showMessageDialog(null, "Error:" + e.getMessage(), "Chordalysis", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } }
From source file:detplagiasi.TextDirectoryToArff.java
License:Open Source License
public Instances createDataset(String directoryPath) throws Exception { FastVector atts = new FastVector(2); atts.addElement(new Attribute("filename", (FastVector) null)); atts.addElement(new Attribute("contents", (FastVector) null)); /*//w ww . java 2s . c o m ArrayList atts = new ArrayList(2); atts.addElement(new Attribute("filename", (ArrayList) null)); atts.addElement(new Attribute("contents", (ArrayList) null)); */ Instances data = new Instances("text_files_in_" + directoryPath, atts, 0); File dir = new File(directoryPath); String[] files = dir.list(); //create file a untuk menampung name file dari instance yang terkait //FileWriter fstream = new FileWriter(directoryPath+"\\cluster detail.txt"); BufferedWriter out = null; out = new BufferedWriter(new FileWriter(directoryPath + "\\cluster detail.txt")); for (int i = 0; i < files.length; i++) { if (files[i].endsWith(".txt")) { out.write("file ke " + (i + 1) + ": " + files[i]); System.out.println("processed files:" + files[i]); fileName[i] = files[i]; out.write("file ke " + (i + 1) + ": " + files[i]); try { double[] newInst = new double[2]; newInst[0] = (double) data.attribute(0).addStringValue(files[i]); File txt = new File(directoryPath + File.separator + files[i]); System.out.println("TDTARFF: " + txt.getCanonicalPath()); InputStreamReader is; is = new InputStreamReader(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString()); try { out.write("file ke " + (i + 1) + ": " + files[i]); System.out.println("success"); } catch (Exception d) { System.err.println(d.getLocalizedMessage()); } //input pada file a nama file dari instance //data.add(new Instance(1.0, newInst)); data.add(new Instance(1.0, newInst)); //data.renameAttributeValue(data.attribute("att_name_in_data2"),"att_value_in_data2","att_value_in_data1"); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]); } } } return data; }
From source file:dewaweebtreeclassifier.veranda.VerandaTree.java
/** * // w ww. j a va 2 s .c o m * @param data */ public void buildTree(Instances data) { // exit if there is no data left in the dataset if (data.numInstances() == 0) { mChild = null; return; } double[] informationGains = new double[data.numAttributes()]; Enumeration enumAttrs = data.enumerateAttributes(); while (enumAttrs.hasMoreElements()) { Attribute attr = (Attribute) enumAttrs.nextElement(); informationGains[attr.index()] = computeGain(data, attr); } int maxIdx = Utils.maxIndex(informationGains); if (Utils.eq(informationGains[maxIdx], 0)) { mClassDistribution = new int[data.numClasses()]; Enumeration enumInst = data.enumerateInstances(); while (enumInst.hasMoreElements()) { Instance instance = (Instance) enumInst.nextElement(); mClassDistribution[(int) instance.classValue()]++; } mClassValue = Utils.maxIndex(mClassDistribution); } else { mSplitAttribute = data.attribute(maxIdx); Instances[] splitInstances = splitInstancesOnAttribute(data, mSplitAttribute); mChild = new VerandaTree[mSplitAttribute.numValues()]; for (int i = 0; i < mChild.length; i++) { mChild[i] = new VerandaTree(); mChild[i].buildTree(splitInstances[i]); } } }
From source file:distributed.core.DistributedUtils.java
License:Open Source License
public static NumericStats getNumericAttributeStatsSparse(Instances denormalized, int attIndex) { NumericStats ns = new NumericStats(denormalized.attribute(attIndex).name()); for (int j = 0; j < denormalized.numInstances(); j++) { double value = denormalized.instance(j).value(attIndex); if (Utils.isMissingValue(value) || value == 0) { ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()]++; } else {/* w w w .j a va 2 s.c o m*/ ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()]++; ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] += value; ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] += value * value; if (Double.isNaN(ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()])) { ns.getStats()[ArffSummaryNumericMetric.MIN .ordinal()] = ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = value; } else if (value < ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()]) { ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = value; } else if (value > ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()]) { ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = value; } } } ns.computeDerived(); return ns; }
From source file:distributed.core.DistributedUtils.java
License:Open Source License
public static Instances makeHeaderWithSummaryAtts(Instances denormalized, boolean treatZerosAsMissing) { Instances header = new Instances(denormalized, 0); for (int i = 0; i < denormalized.numAttributes(); i++) { AttributeStats stats = denormalized.attributeStats(i); if (denormalized.attribute(i).isNumeric()) { NumericStats ns = new NumericStats(denormalized.attribute(i).name()); if (!treatZerosAsMissing) { ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = stats.numericStats.min; ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = stats.numericStats.max; ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()] = stats.numericStats.count; ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] = stats.numericStats.sum; ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] = stats.numericStats.sumSq; ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()] = stats.missingCount; ns.computeDerived();/* www . j a va 2 s.c om*/ } else { ns = getNumericAttributeStatsSparse(denormalized, i); } Attribute newAtt = ns.makeAttribute(); header.insertAttributeAt(newAtt, header.numAttributes()); } else if (denormalized.attribute(i).isNominal()) { NominalStats nom = new NominalStats(denormalized.attribute(i).name()); nom.setNumMissing(stats.missingCount); double[] labelFreqs = stats.nominalWeights; for (int j = 0; j < denormalized.attribute(i).numValues(); j++) { nom.add(denormalized.attribute(i).value(j), labelFreqs[j]); } Attribute newAtt = nom.makeAttribute(); header.insertAttributeAt(newAtt, header.numAttributes()); } } return header; }