List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:cn.ict.zyq.bestConf.bestConf.sampler.DDSSampler.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * //from w w w . ja va 2s .c om * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { ArrayList<Integer>[] crntSetPerm; //only initialize once if (sets == null) { //possible number of sample sets will not exceed $sampleSetSize to the power of 2 int L = (int) Math.min(rounds, atts.size() > 2 ? Math.pow(sampleSetSize, atts.size() - 1) : (atts.size() > 1 ? sampleSetSize : 1)); //initialization dists = new long[L]; sets = new ArrayList[L][]; for (int i = 0; i < L; i++) { dists[i] = -1; sets[i] = null; } long maxMinDist = -1; int posWithMaxMinDist = -1; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); while (inAlready(sets, setPerm))//continue the samples set generation till different samples are obtained setPerm = generateOneSampleSet(sampleSetSize, atts.size()); sets[i] = setPerm; //compute the minimum distance minDist between any sample pair for each set dists[i] = minDistForSet(setPerm); //select the set with the maximum minDist if (dists[i] > maxMinDist) { posWithMaxMinDist = i; maxMinDist = dists[i]; } } //now let the first sample set be the one with the max mindist positionSwitch(sets, dists, 0, posWithMaxMinDist); } crntSetPerm = sets[sampleSetToGet]; //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; boolean[] roundToInt = new boolean[atts.size()]; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize); //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize); if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize) roundToInt[i] = true; } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("SamplesByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][crntSetPerm[j].get(i)] + bounds[j][crntSetPerm[j].get(i) + 1]) / 2 : bounds[j][crntSetPerm[j].get(i)] + ((bounds[j][crntSetPerm[j].get(i) + 1] - bounds[j][crntSetPerm[j].get(i)]) * uniRand.nextDouble()); if (roundToInt[j]) vals[j] = (int) vals[j]; } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java
License:Open Source License
/** * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable * // www . j av a2 s.com * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); if (crntAttr.isNumeric()) { bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize; for (int j = 1; j < sampleSetSize; j++) { bounds[i][j] = bounds[i][j - 1] + pace; } } else {//crntAttr.isNominal() if (crntAttr.numValues() >= sampleSetSize) { //randomly select among the set for (int j = 0; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values } else { //first round-robin int lastPart = sampleSetSize % crntAttr.numValues(); for (int j = 0; j < sampleSetSize - lastPart; j++) bounds[i][j] = j % crntAttr.numValues(); //then randomly select for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues()); } } //nominal attribute } //get all subdomains //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { if (atts.get(j).isNumeric()) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } else {//isNominal() vals[j] = bounds[j][setWithMaxMinDist[j].get(i)]; } } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * //from ww w . j av a 2 s .c o m * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; boolean[] roundToInt = new boolean[atts.size()]; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize); //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize); if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize) roundToInt[i] = true; } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("SamplesByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); if (roundToInt[j]) vals[j] = (int) vals[j]; } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * // w w w . j a va2s . c o m * Let PACE be upper-lower DIVided by the sampleSetSize * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ private static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (bounds[i][sampleSetSize] - bounds[i][0]) / sampleSetSize; for (int j = 1; j < sampleSetSize; j++) { bounds[i][j] = bounds[i][j - 1] + pace; } } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java
License:Open Source License
/** * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable * /*from ww w . j a v a2 s. c o m*/ * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); if (crntAttr.isNumeric()) { bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize; for (int j = 1; j < sampleSetSize; j++) { bounds[i][j] = bounds[i][j - 1] + pace; } } else {//crntAttr.isNominal() if (crntAttr.numValues() >= sampleSetSize) { //randomly select among the set for (int j = 0; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values } else { //first round-robin int lastPart = sampleSetSize % crntAttr.numValues(); for (int j = 0; j < sampleSetSize - lastPart; j++) bounds[i][j] = j % crntAttr.numValues(); //then randomly select for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues()); } } //nominal attribute } //get all subdomains //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { if (atts.get(j).isNumeric()) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } else {//isNominal() vals[j] = bounds[j][setWithMaxMinDist[j].get(i)]; } } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * /* w w w.j ava 2 s . com*/ * Let PACE be log10(upper/lower) * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public static Instances getMultiDimContinuousLog(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; int step, crntStep; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); crntStep = (int) Math.log10(bounds[i][sampleSetSize] - bounds[i][0]); step = sampleSetSize / crntStep;//num of points drawn after the multiplication of 10 int left = sampleSetSize % crntStep; if (bounds[i][0] == 0) bounds[i][0] = uniRand.nextInt(10); crntStep = 1; double theBound = bounds[i][sampleSetSize] / 10; for (int j = 1; j < sampleSetSize; j++) { if (crntStep >= step && bounds[i][j - 1] <= theBound) crntStep = 0; if (crntStep == 0) bounds[i][j] = bounds[i][j - step] * 10; else if (crntStep < step) bounds[i][j] = bounds[i][j - crntStep] * ((double) crntStep * 10. / ((double) step + 1.)); else if (crntStep >= step) bounds[i][j] = bounds[i][j - crntStep] * ((double) crntStep * 10. / (double) (left + step + 1)); if (bounds[i][j] >= bounds[i][sampleSetSize]) System.err.println("be careful!!!!"); crntStep++; } } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * /* www . jav a 2 s . co m*/ * Let PACE be log10(upper/lower) * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public static Instances getMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; boolean[] roundToInt = new boolean[atts.size()]; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize); //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize); if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize) roundToInt[i] = true; } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); if (roundToInt[j]) vals[j] = (int) vals[j]; } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * /*from www . j a va 2 s .c om*/ * Let PACE be upper-lower DIVided by the sampleSetSize * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (bounds[i][sampleSetSize] - bounds[i][0]) / sampleSetSize; for (int j = 1; j < sampleSetSize; j++) { bounds[i][j] = bounds[i][j - 1] + pace; } } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:com.actelion.research.orbit.imageAnalysis.imaging.TMAPoints.java
License:Open Source License
/** * returns x/y pairs for each input point * * @param pList/* w w w. j a v a2 s. c om*/ * @return */ private HashMap<Point, Point> clusterLines(List<Point> pList) { ArrayList<Attribute> attrListX = new ArrayList<Attribute>(2); attrListX.add(new Attribute("xvalue")); ArrayList<Attribute> attrListY = new ArrayList<Attribute>(2); attrListY.add(new Attribute("yvalue")); //attrList.add(new Attribute("class")); Instances xInst = new Instances("xlines", attrListX, pList.size()); Instances yInst = new Instances("ylines", attrListY, pList.size()); //instances.setClassIndex(1); for (Point p : pList) { //Instance inst = new DenseInstance(1d, new double[]{p.x,Double.NaN}); Instance instX = new DenseInstance(1d, new double[] { p.x }); instX.setDataset(xInst); //inst.setClassMissing(); xInst.add(instX); Instance instY = new DenseInstance(1d, new double[] { p.y }); instY.setDataset(yInst); yInst.add(instY); } try { EM colClusterer = new EM(); int numCols = guessNumClusters(colClusterer, xInst, 1, 20); colClusterer.setNumClusters(numCols); colClusterer.buildClusterer(xInst); logger.debug("NumCols: " + colClusterer.getNumClusters()); EM rowClusterer = new EM(); int numRows = guessNumClusters(rowClusterer, yInst, 1, 20); rowClusterer.setNumClusters(numRows); rowClusterer.buildClusterer(yInst); logger.debug("NumRows: " + rowClusterer.getNumClusters()); logger.trace("ColClusterer:"); HashMap<Integer, Integer> colHash = sortAndpPrintCluster(colClusterer); logger.trace("RowClusterer:"); HashMap<Integer, Integer> rowHash = sortAndpPrintCluster(rowClusterer); if (logger.isTraceEnabled()) { logger.trace("ColHash:"); for (Integer i : colHash.keySet()) { logger.trace("cluster " + i + ": " + colHash.get(i)); } logger.trace("RowHash:"); for (Integer i : rowHash.keySet()) { logger.trace("cluster " + i + ": " + rowHash.get(i)); } } // classify points HashMap<Point, Point> pMap = new HashMap<Point, Point>(); for (Point p : pList) { Instance instX = new DenseInstance(1d, new double[] { p.x }); instX.setDataset(xInst); Instance instY = new DenseInstance(1d, new double[] { p.y }); instY.setDataset(yInst); int x = colClusterer.clusterInstance(instX); int y = rowClusterer.clusterInstance(instY); x = colHash.get(x); y = rowHash.get(y); logger.trace(p + ": " + x + "/" + y); pMap.put(p, new Point(x, y)); } return pMap; } catch (Exception e) { e.printStackTrace(); logger.error("error while clustering points", e); return null; } }
From source file:com.actelion.research.orbit.imageAnalysis.models.OrbitModel.java
License:Open Source License
/** * convert models from old weka version/*from ww w . j ava 2 s. c om*/ * * @param model */ public static void fixOldModelVersion(final OrbitModel model) { if (model == null) return; // nothing to fix boolean oldWekaVersion = false; try { model.getStructure().classAttribute().numValues(); } catch (NullPointerException ne) { oldWekaVersion = true; } // apply old model fix? if (oldWekaVersion) { logger.info("model from old weka version (< 3.7.11) detected, trying to apply fixes"); int numClasses = model.getClassShapes().size(); TissueFeatures tf = new TissueFeatures(model.getFeatureDescription(), null); int numFeatures = tf.getFeaturesPerSample() * model.getFeatureDescription().getSampleSize() + 1; ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(numFeatures); for (int a = 0; a < numFeatures - 1; a++) { Attribute attr = new Attribute("a" + a); attrInfo.add(attr); } List<String> classValues = new ArrayList<String>(numClasses); for (int i = 0; i < numClasses; i++) { classValues.add((i + 1) + ".0"); // "1.0", "2.0", ... } Attribute classAttr = new Attribute("class", classValues); attrInfo.add(classAttr); Instances structure = new Instances("trainSet pattern classes", attrInfo, 0); structure.setClassIndex(numFeatures - 1); model.setStructure(structure); try { if (model.getClassifier() != null && model.getClassifier().getClassifier() != null && model.getClassifier().getClassifier() instanceof SMO) { SMO smo = ((SMO) model.getClassifier().getClassifier()); Field field = smo.getClass().getDeclaredField("m_classAttribute"); field.setAccessible(true); field.set(smo, classAttr); // missing values ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(structure); Field missing = smo.getClass().getDeclaredField("m_Missing"); missing.setAccessible(true); missing.set(smo, rmv); // filter Field filter = smo.getClass().getDeclaredField("m_Filter"); filter.setAccessible(true); Filter normalize = (Filter) filter.get(smo); RelationalLocator relLoc = new RelationalLocator(structure); StringLocator strLoc = new StringLocator(structure); Field outputRelAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputRelAtts"); outputRelAtts.setAccessible(true); outputRelAtts.set(normalize, relLoc); Field inputRelAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_InputRelAtts"); inputRelAtts.setAccessible(true); inputRelAtts.set(normalize, relLoc); Field outputStrAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputStringAtts"); outputStrAtts.setAccessible(true); outputStrAtts.set(normalize, strLoc); Field inputStrAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_InputStringAtts"); inputStrAtts.setAccessible(true); inputStrAtts.set(normalize, strLoc); Field outputFormat = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputFormat"); outputFormat.setAccessible(true); outputFormat.set(normalize, structure); logger.info("fixes applied, the model should work with a weka version >= 3.7.11 now"); } // else: good luck... } catch (Exception e) { e.printStackTrace(); logger.error("new weka version fixes could not be applied: " + e.getMessage()); } } // old weka version fixOldModelVersion(model.getSegmentationModel()); // fixOldModelVersion can handle null fixOldModelVersion(model.getSecondarySegmentationModel()); // fixOldModelVersion can handle null fixOldModelVersion(model.getExclusionModel()); // fixOldModelVersion can handle null }