List of usage examples for weka.core Instance attribute
public Attribute attribute(int index);
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static void getBestPerfFrom(String path) { try {/*from w w w . j ava2 s .c o m*/ BestConf bestconf = new BestConf(); Instances trainingSet = DataIOFile.loadDataFromArffFile(path); Instance best = trainingSet.firstInstance(); //set the best configuration to the cluster Map<Attribute, Double> attsmap = new HashMap<Attribute, Double>(); for (int i = 0; i < best.numAttributes() - 1; i++) { attsmap.put(best.attribute(i), best.value(i)); } double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom"); System.out.println("========================================="); System.err.println("The actual performance for the best point is : " + bestPerf); System.out.println("========================================="); } catch (IOException e) { e.printStackTrace(); } }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
public static String getMD5(Instance ins) { StringBuffer name = new StringBuffer(""); for (int i = 0; i < ins.numAttributes() - 2; i++) { name.append(Math.round(ins.value(ins.attribute(i))) + ","); }// w w w.java2 s . c o m return getMD5(name.toString()); }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
private void writePerfstoFile(Instance ins) { File perfFolder = new File(perfsfilepath); if (!perfFolder.exists()) perfFolder.mkdirs();/*from ww w . ja v a 2s .c o m*/ File file = new File(perfsfilepath + "/" + getMD5(ins)); BufferedWriter writer; try { writer = new BufferedWriter(new FileWriter(file)); writer.write(ins.value(ins.attribute(ins.numAttributes() - 1)) + "\n"); writer.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
public Instances runExp(Instances samplePoints, String perfAttName) { Instances retVal = null;/*from ww w .j ava 2s .c o m*/ if (samplePoints.attribute(perfAttName) == null) { Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); } int pos = samplePoints.numInstances(); int count = 0; for (int i = 0; i < pos; i++) { Instance ins = samplePoints.get(i); HashMap hm = new HashMap(); int tot = 0; for (int j = 0; j < ins.numAttributes(); j++) { hm.put(ins.attribute(j).name(), ins.value(ins.attribute(j))); } boolean testRet; if (Double.isNaN(ins.value(ins.attribute(ins.numAttributes() - 1)))) { testRet = this.startTest(hm, i, isInterrupt); double y = 0; if (!testRet) {// the setting does not work, we skip it y = -1; count++; if (count >= targetTestErrorNum) { System.out.println( "There must be somthing wrong with the system. Please check and restart....."); System.exit(1); } } else { y = getPerformanceByType(performanceType); count = 0; } ins.setValue(samplePoints.numAttributes() - 1, y); writePerfstoFile(ins); } else { continue; } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes() - 1); return retVal; }
From source file:com.hack23.cia.service.impl.action.user.wordcount.WordCounterImpl.java
License:Apache License
@Override public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) { final String html = documentContentData.getContent(); final Attribute input = new Attribute("html", (ArrayList<String>) null); final ArrayList<Attribute> inputVec = new ArrayList<>(); inputVec.add(input);/* ww w . j av a 2 s. c o m*/ final Instances htmlInst = new Instances("html", inputVec, 1); htmlInst.add(new DenseInstance(1)); htmlInst.instance(0).setValue(0, html); final StopwordsHandler StopwordsHandler = new StopwordsHandler() { @Override public boolean isStopword(final String word) { return word.length() < 5; } }; final NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(1); tokenizer.setNGramMaxSize(1); tokenizer.setDelimiters(" \r\n\t.,;:'\"()?!'"); final StringToWordVector filter = new StringToWordVector(); filter.setTokenizer(tokenizer); filter.setStopwordsHandler(StopwordsHandler); filter.setLowerCaseTokens(true); filter.setOutputWordCounts(true); filter.setWordsToKeep(maxResult); final Map<String, Integer> result = new HashMap<>(); try { filter.setInputFormat(htmlInst); final Instances dataFiltered = Filter.useFilter(htmlInst, filter); final Instance last = dataFiltered.lastInstance(); final int numAttributes = last.numAttributes(); for (int i = 0; i < numAttributes; i++) { result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i))); } } catch (final Exception e) { LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e); } return result; }
From source file:com.mycompany.id3classifier.kNNClassifier.java
private static double findDistance(Instance instance1, Instance instance2) { double total = 0; int totalAttributes = instance1.numAttributes(); for (int i = 0; i < totalAttributes; i++) { if (instance1.classIndex() == i) continue; double difference = 0; if (instance1.attribute(i).isNumeric()) { difference = Math.abs(instance1.value(i) - instance2.value(i)); }/* ww w.ja va 2 s .c o m*/ else { if (!instance1.stringValue(i).equals(instance2.stringValue(i))) { difference = 1; } } total += Math.pow(difference, totalAttributes); } return Math.pow(total, 1.0 / totalAttributes); }
From source file:com.openkm.kea.filter.KEAPhraseFilter.java
License:Open Source License
/** * Converts an instance by removing all non-alphanumeric characters * from its string attribute values.//from w w w . j av a 2 s . co m */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if (!instance.attribute(i).isString() || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { if (!m_SelectCols.isInRange(i)) { int index = getOutputFormat().attribute(i).addStringValue(instance.stringValue(i)); instVals[i] = (double) index; continue; } // aly: str = text of the document String str = instance.stringValue(i); String tokenized = tokenize(str); // aly: resultStr is the clean version of str // log.info(resultStr.toString()); int index = getOutputFormat().attribute(i).addStringValue(tokenized); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:com.openkm.kea.filter.NumbersFilter.java
License:Open Source License
/** * Converts an instance. A phrase boundary is inserted where * a number is found./*www . j a va2s . co m*/ */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if ((!instance.attribute(i).isString()) || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); StringTokenizer tok = new StringTokenizer(str, " \t\n", true); while (tok.hasMoreTokens()) { String token = tok.nextToken(); // Everything that doesn't contain at least // one letter is considered to be a number boolean isNumber = true; for (int j = 0; j < token.length(); j++) { if (Character.isLetter(token.charAt(j))) { isNumber = false; break; } } if (!isNumber) { resultStr.append(token); } else { if (token.equals(" ") || token.equals("\t") || token.equals("\n")) { resultStr.append(token); } else { resultStr.append(" \n "); } } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java
License:Open Source License
/** * Returns class probability distribution for the given instance. * /*from w w w. ja v a2s . co m*/ * @param instance the instance to be classified * @return the class probabilities * @throws Exception if an error occurred during the prediction */ @Override public double[] distributionForInstance(Instance instance) throws Exception { if (m_ZeroR != null) { return m_ZeroR.distributionForInstance(instance); } else { double[] result = new double[instance.numClasses()]; if (m_ActualClusterer != null) { // build new instance Instances tempData = m_ClusteringHeader.stringFreeStructure(); double[] values = new double[tempData.numAttributes()]; int n = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == instance.classIndex()) { continue; } if (instance.attribute(i).isString()) { values[n] = tempData.attribute(n).addStringValue(instance.stringValue(i)); } else if (instance.attribute(i).isRelationValued()) { values[n] = tempData.attribute(n).addRelation(instance.relationalValue(i)); } else { values[n] = instance.value(i); } n++; } Instance newInst = new DenseInstance(instance.weight(), values); newInst.setDataset(tempData); if (!getLabelAllClusters()) { // determine cluster/class double r = m_ClustersToClasses[m_ActualClusterer.clusterInstance(newInst)]; if (r == -1) { return result; // Unclassified } else { result[(int) r] = 1.0; return result; } } else { double[] classProbs = new double[instance.numClasses()]; double[] dist = m_ActualClusterer.distributionForInstance(newInst); for (int i = 0; i < dist.length; i++) { for (int j = 0; j < instance.numClasses(); j++) { classProbs[j] += dist[i] * m_ClusterClassProbs[i][j]; } } Utils.normalize(classProbs); return classProbs; } } else { return result; // Unclassified } } }
From source file:control.CosineDistance.java
License:Open Source License
/** * Calculates the distance between two instances. * /*from www. jav a 2 s . c o m*/ * @param first the first instance * @param second the second instance * @return the distance between the two given instances */ public double distance(Instance first, Instance second) { HashMap<String, Double> fInstance = new HashMap<String, Double>(); HashMap<String, Double> sInstance = new HashMap<String, Double>(); for (int i = 0; i < first.numAttributes(); i++) { fInstance.put(first.attribute(i).name(), first.value(i)); sInstance.put(second.attribute(i).name(), second.value(i)); } return 1 - CosineSimilarity.calculateCosineSimilarity(fInstance, sInstance); }