List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:cyber009.udal.mains.WekaUDAL.java
@SuppressWarnings("unchecked") public void updateLabelDataSet() { int count = 0; Instances temp = new Instances(data.unLabelDataSets, 0, 0); data.infoFWunLabel = (HashMap<Integer, Double>) Utilitys.sortByValue((Map) data.infoFWunLabel); for (Map.Entry<Integer, Double> entrySet : data.infoFWunLabel.entrySet()) { int index = entrySet.getKey(); if (count < data.N_FL) { // System.out.println(index + " : " // +entrySet.getValue() + " : " // + data.unLabelDataSets.instance(index).toString()); func.syntacticLabelFunction(data.unLabelDataSets.get(index)); data.labelDataSets.add(data.unLabelDataSets.get(index)); } else {/* ww w. j ava 2 s . c o m*/ temp.add(data.unLabelDataSets.get(index)); } count++; } data.infoFWunLabel.clear(); data.unLabelDataSets.clear(); data.unLabelDataSets.addAll(temp); //System.out.println("------------------------------------------"); }
From source file:cz.vse.fis.keg.entityclassifier.core.salience.EntitySaliencer.java
License:Open Source License
public void computeSalience(List<Entity> entities) { try {//from w w w . ja va 2s . c om if (!initialized) { initialize(); initialized = true; } ArrayList<SEntity> processedEntities = new ArrayList<SEntity>(); for (Entity e : entities) { SEntity entityMention = new SEntity(); entityMention.setBeginIndex(e.getStartOffset().intValue()); entityMention.setEntityType(e.getEntityType()); ArrayList<Type> types = e.getTypes(); ArrayList<String> loggedURIs = new ArrayList<String>(); if (types != null) { for (Type t : types) { String entityURI = t.getEntityURI(); if (!loggedURIs.contains(entityURI)) { loggedURIs.add(entityURI); entityMention.getUrls().add(entityURI); } } } boolean entityAlreadyLogged = false; for (SEntity sEntity : processedEntities) { boolean isThisEntitySame = false; ArrayList<String> entityURIs1 = sEntity.getUrls(); ArrayList<String> entityURIs2 = entityMention.getUrls(); for (String eURI1 : entityURIs1) { for (String eURI2 : entityURIs2) { if (!entityAlreadyLogged) { if (eURI1.equals(eURI2)) { entityAlreadyLogged = true; isThisEntitySame = true; sEntity.setNumOccurrences(sEntity.getNumOccurrences() + 1); } } } } if (isThisEntitySame) { for (String uri : entityMention.getUrls()) { if (!sEntity.getUrls().contains(uri)) { sEntity.getUrls().add(uri); } } } } // Entity seen for first time in the document. if (!entityAlreadyLogged) { entityMention.setNumOccurrences(1); processedEntities.add(entityMention); } } // Preparing the test data container. FastVector attributes = new FastVector(6); attributes.add(new Attribute("beginIndex")); attributes.add(new Attribute("numUniqueEntitiesInDoc")); attributes.add(new Attribute("numOfOccurrencesOfEntityInDoc")); attributes.add(new Attribute("numOfEntityMentionsInDoc")); FastVector entityTypeNominalAttVal = new FastVector(2); entityTypeNominalAttVal.addElement("named_entity"); entityTypeNominalAttVal.addElement("common_entity"); Attribute entityTypeAtt = new Attribute("type", entityTypeNominalAttVal); attributes.add(entityTypeAtt); FastVector classNominalAttVal = new FastVector(3); classNominalAttVal.addElement("not_salient"); classNominalAttVal.addElement("less_salient"); classNominalAttVal.addElement("most_salient"); Attribute classAtt = new Attribute("class", classNominalAttVal); attributes.add(classAtt); Instances evalData = new Instances("MyRelation", attributes, 0); evalData.setClassIndex(evalData.numAttributes() - 1); for (int i = 0; i < processedEntities.size(); i++) { String entityType = ""; if (processedEntities.get(i).getEntityType().equals("named entity")) { entityType = "named_entity"; } else if (processedEntities.get(i).getEntityType().equals("common entity")) { entityType = "common_entity"; } else { } Instance inst = new DenseInstance(6); inst.setValue(evalData.attribute(0), processedEntities.get(i).getBeginIndex()); // begin index inst.setValue(evalData.attribute(1), processedEntities.size()); // num of unique entities in doc inst.setValue(evalData.attribute(2), processedEntities.get(i).getNumOccurrences()); // num of entity occurrences in doc inst.setValue(evalData.attribute(3), entities.size()); // num of entity mentions in doc inst.setValue(evalData.attribute(4), entityType); // type of the entity evalData.add(inst); } for (int i = 0; i < processedEntities.size(); i++) { SEntity sEntity = processedEntities.get(i); int classIndex = (int) classifier.classifyInstance(evalData.get(i)); String classLabel = evalData.firstInstance().classAttribute().value(classIndex); double pred[] = classifier.distributionForInstance(evalData.get(i)); double probability = pred[classIndex]; double salienceScore = pred[1] * 0.5 + pred[2]; sEntity.setSalienceScore(salienceScore); sEntity.setSalienceConfidence(probability); sEntity.setSalienceClass(classLabel); for (Entity e : entities) { ArrayList<Type> types = e.getTypes(); if (types != null) { for (Type t : types) { if (sEntity.getUrls().contains(t.getEntityURI())) { Salience s = new Salience(); s.setClassLabel(classLabel); DecimalFormat df = new DecimalFormat("0.000"); double fProbability = df.parse(df.format(probability)).doubleValue(); double fSalience = df.parse(df.format(salienceScore)).doubleValue(); s.setConfidence(fProbability); s.setScore(fSalience); t.setSalience(s); } } } } } } catch (Exception ex) { Logger.getLogger(EntitySaliencer.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Set up the header for the PC->original space dataset * /*from ww w .j a v a2s . co m*/ * @return the output format * @throws Exception if something goes wrong */ private Instances setOutputFormatOriginal() throws Exception { FastVector attributes = new FastVector(); for (int i = 0; i < m_numAttribs; i++) { String att = m_trainInstances.attribute(i).name(); attributes.addElement(new Attribute(att)); } if (m_hasClass) { attributes.addElement(m_trainHeader.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainHeader.relationName() + "->PC->original space", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes() - 1); } return outputFormat; }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Set the format for the transformed data * @return a set of empty Instances (header only) in the new format * @throws Exception if the output format can't be set *//*from ww w . j av a 2 s.c o m*/ private Instances setOutputFormat() throws Exception { if (m_eigenvalues == null) { return null; } double cumulative = 0.0; FastVector attributes = new FastVector(); for (int i = m_numAttribs - 1; i >= 0; i--) { StringBuffer attName = new StringBuffer(); // build array of coefficients double[] coeff_mags = new double[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]); int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs; // this array contains the sorted indices of the coefficients int[] coeff_inds; if (m_numAttribs > 0) { // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude coeff_inds = Utils.sort(coeff_mags); } else { // if m_maxAttrsInName <= 0, use all coeffs in original order coeff_inds = new int[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) coeff_inds[j] = j; } // build final attName string for (int j = 0; j < num_attrs; j++) { double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]]; if (j > 0 && coeff_value >= 0) attName.append("+"); attName.append( Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name()); } if (num_attrs < m_numAttribs) attName.append("..."); attributes.addElement(new Attribute(attName.toString())); cumulative += m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (m_hasClass) { attributes.addElement(m_trainHeader.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes() - 1); } m_outputNumAtts = outputFormat.numAttributes(); return outputFormat; }
From source file:de.fub.maps.project.detector.model.inference.processhandler.CrossValidationProcessHandler.java
License:Open Source License
@Override protected void handle() { Collection<Attribute> attributeList = getInferenceModel().getAttributes(); Instances trainingSet = new Instances("Classes", new ArrayList<Attribute>(attributeList), 9); trainingSet.setClassIndex(0);/*from w ww. jav a 2s . co m*/ HashMap<String, HashSet<TrackSegment>> dataset = getInferenceModel().getInput().getTrainingsSet(); for (Entry<String, HashSet<TrackSegment>> entry : dataset.entrySet()) { for (TrackSegment trackSegment : entry.getValue()) { Instance instance = getInstance(entry.getKey(), trackSegment); trainingSet.add(instance); } } assert trainingSet.numInstances() > 0 : "Training set is empty and has no instances"; //NO18N evaluate(trainingSet); }
From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java
License:Open Source License
@Override protected void handle() { clearResults();// w w w. j a va 2s . c om Classifier classifier = getInferenceModel().getClassifier(); HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet(); Collection<Attribute> attributeList = getInferenceModel().getAttributes(); if (!attributeList.isEmpty()) { Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet(); setClassesToView(keySet); Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList), 0); //NO18N unlabeledInstances.setClassIndex(0); ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>(); for (TrackSegment segment : inferenceDataSet) { Instance instance = getInstance(segment); unlabeledInstances.add(instance); segmentList.add(segment); } // create copy Instances labeledInstances = new Instances(unlabeledInstances); for (int index = 0; index < labeledInstances.numInstances(); index++) { try { Instance instance = labeledInstances.instance(index); // classify instance double classifyed = classifier.classifyInstance(instance); instance.setClassValue(classifyed); // get class label String value = unlabeledInstances.classAttribute().value((int) classifyed); if (index < segmentList.size()) { instanceToTrackSegmentMap.put(instance, segmentList.get(index)); } // put label and instance to result map put(value, instance); } catch (Exception ex) { Exceptions.printStackTrace(ex); } } // update visw updateVisualRepresentation(); // update result set of the inferenceModel for (Entry<String, List<Instance>> entry : resultMap.entrySet()) { HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>(); for (Instance instance : entry.getValue()) { TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance); if (trackSegment != null) { trackSegmentList.add(trackSegment); } } // only those classes are put into the result data set, which are not empty if (!trackSegmentList.isEmpty()) { getInferenceModel().getResult().put(entry.getKey(), trackSegmentList); } } } else { throw new InferenceModelClassifyException(MessageFormat .format("No attributes available. Attribute list lengeth == {0}", attributeList.size())); } resultMap.clear(); instanceToTrackSegmentMap.clear(); }
From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java
License:Open Source License
@Override protected void handle() { clearResults();//from www . j a v a2s . c o m Classifier classifier = getInferenceModel().getClassifier(); Collection<Attribute> attributeList = getInferenceModel().getAttributes(); if (!attributeList.isEmpty()) { Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet(); setClassesToView(keySet); Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList), 0); //NO18N unlabeledInstances.setClassIndex(0); ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>(); for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet() .entrySet()) { for (TrackSegment segment : entry.getValue()) { segment.setLabel(entry.getKey()); Instance instance = getInstance(segment); unlabeledInstances.add(instance); segmentList.add(segment); } } // create copy Instances labeledInstances = new Instances(unlabeledInstances); for (int index = 0; index < labeledInstances.numInstances(); index++) { try { Instance instance = labeledInstances.instance(index); // classify instance double classifyed = classifier.classifyInstance(instance); instance.setClassValue(classifyed); // get class label String value = unlabeledInstances.classAttribute().value((int) classifyed); if (index < segmentList.size()) { instanceToTrackSegmentMap.put(instance, segmentList.get(index)); } // put label and instance to result map put(value, instance); } catch (Exception ex) { Exceptions.printStackTrace(ex); } } // update visw updateVisualRepresentation(); // update result set of the inferenceModel for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) { HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>(); for (Instance instance : entry.getValue()) { TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance); if (trackSegment != null) { trackSegmentList.add(trackSegment); } } // only those classes are put into the result data set, which are not empty if (!trackSegmentList.isEmpty()) { getInferenceModel().getResult().put(entry.getKey(), trackSegmentList); } } } else { throw new InferenceModelClassifyException(MessageFormat .format("No attributes available. Attribute list lengeth == {0}", attributeList.size())); } resultMap.clear(); instanceToTrackSegmentMap.clear(); }
From source file:de.fub.maps.project.detector.model.inference.processhandler.TrainingsDataProcessHandler.java
License:Open Source License
@Override protected void handle() { final ProgressHandle handle = ProgressHandleFactory.createHandle("Trainings"); try {/*w ww . jav a2 s. c o m*/ handle.start(); Collection<Attribute> attributeCollection = getInferenceModel().getAttributes(); ArrayList<Attribute> arrayList = new ArrayList<Attribute>(attributeCollection); Instances trainingSet = new Instances("Classes", arrayList, 0); trainingSet.setClassIndex(0); Instances testingSet = new Instances("Classes", arrayList, 0); testingSet.setClassIndex(0); HashMap<String, HashSet<TrackSegment>> dataset = getInferenceModel().getInput().getTrainingsSet(); int datasetCount = 0; for (HashSet<TrackSegment> list : dataset.values()) { for (TrackSegment trackSegment : list) { datasetCount += trackSegment.getWayPointList().size(); } } handle.switchToDeterminate(datasetCount); int trackCount = 0; for (Entry<String, HashSet<TrackSegment>> entry : dataset.entrySet()) { int trainingsSetSize = (int) Math.ceil(entry.getValue().size() * getTrainingsSetRatioParameter()); int index = 0; for (TrackSegment trackSegment : entry.getValue()) { Instance instance = getInstance(entry.getKey(), trackSegment); if (index < trainingsSetSize) { trainingSet.add(instance); } else { testingSet.add(instance); } handle.progress(trackCount++); index++; } } assert trainingSet.numInstances() > 0 : "Training set is empty and has no instances"; //NO18N assert testingSet.numInstances() > 0 : "Testing set is empty and has no instances"; //NO18N handle.switchToIndeterminate(); evaluate(trainingSet, testingSet); } finally { handle.finish(); } }
From source file:de.ugoe.cs.cpdp.dataselection.DecisionTreeSelection.java
License:Apache License
@Override public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { final Instances data = characteristicInstances(testdata, traindataSet); final ArrayList<String> attVals = new ArrayList<String>(); attVals.add("same"); attVals.add("more"); attVals.add("less"); final ArrayList<Attribute> atts = new ArrayList<Attribute>(); for (int j = 0; j < data.numAttributes(); j++) { atts.add(new Attribute(data.attribute(j).name(), attVals)); }//from w w w . jav a 2 s. c o m atts.add(new Attribute("score")); Instances similarityData = new Instances("similarity", atts, 0); similarityData.setClassIndex(similarityData.numAttributes() - 1); try { Classifier classifier = new J48(); for (int i = 0; i < traindataSet.size(); i++) { classifier.buildClassifier(traindataSet.get(i)); for (int j = 0; j < traindataSet.size(); j++) { if (i != j) { double[] similarity = new double[data.numAttributes() + 1]; for (int k = 0; k < data.numAttributes(); k++) { if (0.9 * data.get(i + 1).value(k) > data.get(j + 1).value(k)) { similarity[k] = 2.0; } else if (1.1 * data.get(i + 1).value(k) < data.get(j + 1).value(k)) { similarity[k] = 1.0; } else { similarity[k] = 0.0; } } Evaluation eval = new Evaluation(traindataSet.get(j)); eval.evaluateModel(classifier, traindataSet.get(j)); similarity[data.numAttributes()] = eval.fMeasure(1); similarityData.add(new DenseInstance(1.0, similarity)); } } } REPTree repTree = new REPTree(); if (repTree.getNumFolds() > similarityData.size()) { repTree.setNumFolds(similarityData.size()); } repTree.setNumFolds(2); repTree.buildClassifier(similarityData); Instances testTrainSimilarity = new Instances(similarityData); testTrainSimilarity.clear(); for (int i = 0; i < traindataSet.size(); i++) { double[] similarity = new double[data.numAttributes() + 1]; for (int k = 0; k < data.numAttributes(); k++) { if (0.9 * data.get(0).value(k) > data.get(i + 1).value(k)) { similarity[k] = 2.0; } else if (1.1 * data.get(0).value(k) < data.get(i + 1).value(k)) { similarity[k] = 1.0; } else { similarity[k] = 0.0; } } testTrainSimilarity.add(new DenseInstance(1.0, similarity)); } int bestScoringProductIndex = -1; double maxScore = Double.MIN_VALUE; for (int i = 0; i < traindataSet.size(); i++) { double score = repTree.classifyInstance(testTrainSimilarity.get(i)); if (score > maxScore) { maxScore = score; bestScoringProductIndex = i; } } Instances bestScoringProduct = traindataSet.get(bestScoringProductIndex); traindataSet.clear(); traindataSet.add(bestScoringProduct); } catch (Exception e) { Console.printerr("failure during DecisionTreeSelection: " + e.getMessage()); throw new RuntimeException(e); } }
From source file:de.ugoe.cs.cpdp.dataselection.SetWiseEMContextSelection.java
License:Apache License
/** * Returns test- and training data with only the project context factors which were chosen in * the configuration. This is later used for clustering. * //from www.j a v a 2s. c o m * @param testdata * @param traindataSet * @return */ protected Instances getContextFactors(Instances testdata, SetUniqueList<Instances> traindataSet) { // setup weka Instances for clustering final ArrayList<Attribute> atts = new ArrayList<Attribute>(); // we only want the project context factors for (String pcf : this.project_context_factors) { atts.add(new Attribute(pcf)); } // set up the data final Instances data = new Instances("project_context_factors", atts, 0); double[] instanceValues = new double[atts.size()]; // only project context factors + only one instance per project needed int i = 0; for (String pcf : this.project_context_factors) { instanceValues[i] = testdata.instance(0).value(testdata.attribute(pcf)); // Console.traceln(Level.INFO, "adding attribute: " + pcf + " value: " + // instanceValues[i]); i++; } data.add(new DenseInstance(1.0, instanceValues)); // now for the projects of the training stet for (Instances traindata : traindataSet) { instanceValues = new double[atts.size()]; // ohne das hier immer dieselben werte?! i = 0; for (String pcf : this.project_context_factors) { instanceValues[i] = traindata.instance(0).value(traindata.attribute(pcf)); // Console.traceln(Level.INFO, "adding attribute: " + pcf + " value: " + // instanceValues[i]); i++; } data.add(new DenseInstance(1.0, instanceValues)); } return data; }