List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances testSetToInstances(List<double[]> testSet) { if (testSet.size() == 0) { logger.warn("TestSet has size 0"); }//from ww w . j a va2 s .c o m final double[] sample = testSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); } final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); fvWekaAttributes.add(ClassAttribute); // Declare the feature vector final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size()); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : testSet) { final Instance wekaInstance = new DenseInstance(1, instance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }
From source file:detplagiasi.TextDirectoryToArff.java
License:Open Source License
public Instances createDataset(String directoryPath) throws Exception { FastVector atts = new FastVector(2); atts.addElement(new Attribute("filename", (FastVector) null)); atts.addElement(new Attribute("contents", (FastVector) null)); /*/* w w w. j av a 2 s .c o m*/ ArrayList atts = new ArrayList(2); atts.addElement(new Attribute("filename", (ArrayList) null)); atts.addElement(new Attribute("contents", (ArrayList) null)); */ Instances data = new Instances("text_files_in_" + directoryPath, atts, 0); File dir = new File(directoryPath); String[] files = dir.list(); //create file a untuk menampung name file dari instance yang terkait //FileWriter fstream = new FileWriter(directoryPath+"\\cluster detail.txt"); BufferedWriter out = null; out = new BufferedWriter(new FileWriter(directoryPath + "\\cluster detail.txt")); for (int i = 0; i < files.length; i++) { if (files[i].endsWith(".txt")) { out.write("file ke " + (i + 1) + ": " + files[i]); System.out.println("processed files:" + files[i]); fileName[i] = files[i]; out.write("file ke " + (i + 1) + ": " + files[i]); try { double[] newInst = new double[2]; newInst[0] = (double) data.attribute(0).addStringValue(files[i]); File txt = new File(directoryPath + File.separator + files[i]); System.out.println("TDTARFF: " + txt.getCanonicalPath()); InputStreamReader is; is = new InputStreamReader(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString()); try { out.write("file ke " + (i + 1) + ": " + files[i]); System.out.println("success"); } catch (Exception d) { System.err.println(d.getLocalizedMessage()); } //input pada file a nama file dari instance //data.add(new Instance(1.0, newInst)); data.add(new Instance(1.0, newInst)); //data.renameAttributeValue(data.attribute("att_name_in_data2"),"att_value_in_data2","att_value_in_data1"); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]); } } } return data; }
From source file:DiversifyQuery.DivTopK.java
/** * Sets the format of the filtered instances that are output. I.e. will * include k attributes each shapelet distance and a class value * * @param inputFormat the format of the input data * @return a new Instances object in the desired output format * @throws Exception if all required parameters of the filter are not * initialised correctly/*ww w .j a va 2 s. c o m*/ */ protected Instances determineOutputFormat(Instances inputFormat, ArrayList<LegacyShapelet> shapelets) throws Exception { //Set up instances size and format. //int length = this.numShapelets; int length = shapelets.size(); FastVector atts = new FastVector(); String name; for (int i = 0; i < length; i++) { name = "Shapelet_" + i; atts.addElement(new Attribute(name)); } if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); FastVector vals = new FastVector(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.addElement(target.value(i)); } atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
/** * Sets the format of the filtered instances that are output. I.e. will * include k attributes each shapelet distance and a class value * * @param inputFormat the format of the input data * @return a new Instances object in the desired output format * @throws Exception if all required parameters of the filter are not * initialised correctly// ww w.j a va2 s. c o m */ @Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (this.numShapelets < 1) { throw new Exception( "ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1"); } //Set up instances size and format. //int length = this.numShapelets; int length = this.shapelets.size(); FastVector atts = new FastVector(); String name; for (int i = 0; i < length; i++) { name = "Shapelet_" + i; atts.addElement(new Attribute(name)); } if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); FastVector vals = new FastVector(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.addElement(target.value(i)); } atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; }
From source file:edu.cmu.cs.in.hoop.hoops.analyze.HoopWekaML.java
License:Open Source License
/** * *//*from ww w . j a v a 2s.c o m*/ public HoopWekaML() { setClassName("HoopWekaML"); debug("HoopWekaML ()"); removeOutPort("KV"); setHoopDescription("Run Weka Machine Learning"); String[] options = new String[1]; options[0] = "-U"; // unpruned tree J48 tree = new J48(); // new instance of tree try { tree.setOptions(options); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // Declare a nominal attribute along with its values FastVector fvNominalVal = new FastVector(3); fvNominalVal.addElement("blue"); fvNominalVal.addElement("gray"); fvNominalVal.addElement("black"); // Declare the class attribute along with its values FastVector fvClassVal = new FastVector(2); fvClassVal.addElement("positive"); fvClassVal.addElement("negative"); Attribute ClassAttribute = new Attribute("theClass", fvClassVal); // Declare two numeric attributes Attribute Attribute1 = new Attribute("firstNumeric"); Attribute Attribute2 = new Attribute("secondNumeric"); Attribute Attribute3 = new Attribute("aNominal", fvNominalVal); // Declare the feature vector FastVector fvWekaAttributes = new FastVector(4); fvWekaAttributes.addElement(Attribute1); fvWekaAttributes.addElement(Attribute2); fvWekaAttributes.addElement(Attribute3); fvWekaAttributes.addElement(ClassAttribute); // Create an empty training set Instances isTrainingSet = new Instances("Rel", fvWekaAttributes, 10); // Set class index isTrainingSet.setClassIndex(3); try { tree.buildClassifier(isTrainingSet); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java
License:Apache License
@Override public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation) throws AnalysisEngineProcessException { // create attribute (including label) info ArrayList<Attribute> attributes = new ArrayList<>(); List<String> labelNames = ClassifierProvider.labelNames(Y); labelNames.stream().map(attr -> new Attribute(attr, Arrays.asList("y", "n"))) .forEachOrdered(attributes::add); List<String> featureNames = ClassifierProvider.featureNames(X); featureNames.stream().map(Attribute::new).forEachOrdered(attributes::add); String name = Files.getNameWithoutExtension(modelFile.getName()); datasetSchema = new Instances(name, attributes, 0); datasetSchema.setClassIndex(labelNames.size()); // add instances // due to the limitation of the interface definition, X, Y should be reorganized SetMultimap<Map<String, Double>, String> XY = HashMultimap.create(); IntStream.range(0, X.size()).forEach(i -> XY.put(X.get(i), Y.get(i))); Instances trainingInstances = new Instances(datasetSchema, XY.size()); for (Map.Entry<Map<String, Double>, Collection<String>> entry : XY.asMap().entrySet()) { Set<String> y = ImmutableSet.copyOf(entry.getValue()); Map<String, Double> x = entry.getKey(); SparseInstance instance = new SparseInstance(labelNames.size() + x.size()); for (String labelName : labelNames) { instance.setValue(datasetSchema.attribute(labelName), y.contains(labelName) ? "y" : "n"); }/*from ww w. j a v a 2s . co m*/ for (Map.Entry<String, Double> e : x.entrySet()) { instance.setValue(datasetSchema.attribute(e.getKey()), e.getValue()); } trainingInstances.add(instance); } // training try { classifier = (MultiLabelClassifier) AbstractClassifier.forName(classifierName, options); classifier.buildClassifier(trainingInstances); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } try { SerializationHelper.write(modelFile.getAbsolutePath(), classifier); SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } if (crossValidation) { try { Evaluation eval = new Evaluation(trainingInstances); Random rand = new Random(); eval.crossValidateModel(classifier, trainingInstances, 10, rand); LOG.debug(eval.toSummaryString()); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.WekaProvider.java
License:Apache License
@Override public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation) throws AnalysisEngineProcessException { // create attribute (including label) info ArrayList<Attribute> attributes = new ArrayList<>(); ClassifierProvider.featureNames(X).stream().map(Attribute::new).forEachOrdered(attributes::add); Attribute label = new Attribute("__label__", ClassifierProvider.labelNames(Y)); attributes.add(label);//from w ww . j av a2 s . com String name = Files.getNameWithoutExtension(modelFile.getName()); datasetSchema = new Instances(name, attributes, X.size()); datasetSchema.setClass(label); // add instances Instances trainingInstances = new Instances(datasetSchema, X.size()); if (balanceWeight) { Multiset<String> labelCounts = HashMultiset.create(Y); double maxCount = labelCounts.entrySet().stream().mapToInt(Multiset.Entry::getCount).max() .orElseThrow(AnalysisEngineProcessException::new); for (int i = 0; i < X.size(); i++) { String y = Y.get(i); double weight = maxCount / labelCounts.count(y); trainingInstances.add(newInstance(X.get(i), y, weight, trainingInstances)); } } else { for (int i = 0; i < X.size(); i++) { trainingInstances.add(newInstance(X.get(i), Y.get(i), 1.0, trainingInstances)); } } // training try { classifier = AbstractClassifier.forName(classifierName, options); classifier.buildClassifier(trainingInstances); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // write model and dataset schema try { SerializationHelper.write(modelFile.getAbsolutePath(), classifier); SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // backup training dataset as arff file if (datasetExportFile != null) { try { ArffSaver saver = new ArffSaver(); saver.setInstances(trainingInstances); saver.setFile(datasetExportFile); saver.writeBatch(); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } if (crossValidation) { try { Evaluation eval = new Evaluation(trainingInstances); Random rand = new Random(); eval.crossValidateModel(classifier, trainingInstances, 10, rand); LOG.debug(eval.toSummaryString()); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Converts a feature set object to a weka Instances object * <p/>/*from w w w. ja v a 2s. c o m*/ * The class is set to the last attribute. * * @param feature_set the feature set to convert * @return a weka instances object * @throws Exception If the arff file can't be written or read. */ public static Instances convertFeatureSetToWekaInstances(FeatureSet feature_set) throws Exception { ArrayList<Attribute> attributes = generateWekaAttributes(feature_set.getFeatures()); Instances instances = new Instances("AuToBI_feature_set", attributes, feature_set.getDataPoints().size()); for (Word w : feature_set.getDataPoints()) { Instance inst = ClassifierUtils.assignWekaAttributes(instances, w); instances.add(inst); } ClassifierUtils.setWekaClassAttribute(instances, feature_set.getClassAttribute()); return instances; }
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Converts a feature set object to a weka Instances object. * <p/>/*from w w w .j a v a 2 s.c o m*/ * Use wekas instance weighting capability to assign weights for each data point. * * @param feature_set the feature set to convert * @param fn a weight function * @return a weka instances object */ public static Instances convertFeatureSetToWeightedWekaInstances(FeatureSet feature_set, WeightFunction fn) { ArrayList<Attribute> attributes = generateWekaAttributes(feature_set.getFeatures()); Instances instances = new Instances("AuToBI_feature_set", attributes, feature_set.getDataPoints().size()); for (Word w : feature_set.getDataPoints()) { Instance inst = ClassifierUtils.assignWekaAttributes(instances, w); inst.setWeight(fn.weight(w)); instances.add(inst); } ClassifierUtils.setWekaClassAttribute(instances, feature_set.getClassAttribute()); return instances; }
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Constructs a data point to a weka instance given a FastVector of weka attribute and a class attribute. * * @param attributes a FastVector of weka attributes * @param data_point the data point to convert * @param class_attribute the class attribute * @return a weka instance./*from www.ja v a2 s. c om*/ */ protected static Instance constructWekaInstance(ArrayList<Attribute> attributes, Word data_point, String class_attribute) { Instances instances = new Instances("single_instance_set", attributes, 0); setWekaClassAttribute(instances, class_attribute); return assignWekaAttributes(instances, data_point); }