List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:classifier.SentenceBasedTextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined by * a call to getStructure then method should do so before processing the * rest of the data set./*from www . j ava2 s .c om*/ * * @return the structure of the data set as an empty set of Instances * @throws IOException * if there is no source or parsing fails */ @Override public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); ArrayList<String> classes = new ArrayList<String>(); ArrayList<String> filenames = new ArrayList<String>(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.add((String) enm.nextElement()); Instances data = getStructure(); int fileCount = 0; // each class is actually the filename - this is preserved around weka, // so its useful for tracking associations later and using as an "index" // for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.get(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + files[j]); File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); filenames.add(files[j]); BufferedInputStream is; is = new BufferedInputStream(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } // Here is my extension to Text Directory Loader. String regexSentenceSplit = "(\\n)"; String rawtext = txtStr.toString(); rawtext = rawtext.toLowerCase(); rawtext.trim(); // split the sentences String[] sentences = rawtext.split(regexSentenceSplit); for (String sentence : sentences) { double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; newInst[0] = (double) data.attribute(0).addStringValue(sentence + "\n"); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new DenseInstance(1.0, newInst)); // } } writeFilenames(directoryPath, filenames); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + files[j]); } } } // this.m_structure.setClassIndex(-1); return data; }
From source file:cn.edu.xjtu.dbmine.TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set./*from www . j ava 2 s.c om*/ * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); FastVector classes = new FastVector(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.addElement(enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.elementAt(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedInputStream is; is = new BufferedInputStream(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; /*while ((c = is.read()) != -1) { txtStr.append((char) c); }*/ FileReader fr = new FileReader(txt); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { txtStr.append(line + "\n"); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public Instances loadPropertiesAsInstancesPre(String Path) { HashMap<String, String> pmap = null; try {//w w w .j ava2 s.c o m pmap = Yaml.loadType(new FileInputStream(yamlPath), HashMap.class); } catch (FileNotFoundException e) { e.printStackTrace(); } atts = new ArrayList<Attribute>(); Instance dfIns = new DenseInstance(pmap.size()); int pos = 0; double[] vals = new double[pmap.size()]; for (Map.Entry<String, String> ent : pmap.entrySet()) { try { double val = Double.valueOf(String.valueOf(ent.getValue())); vals[pos] = val; Properties p1 = new Properties(); double upper, lower; if (val != 0) { upper = val * (1. + 0.5); lower = val * (1. - 0.5); } else { lower = val; upper = 1; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); atts.add(new Attribute(String.valueOf(ent.getKey()), prop1)); pos++; } catch (Exception e) { } } Instances dfProp = new Instances("DefaultConfig", atts, 1); dfProp.add(dfIns); dfIns.setDataset(dfProp); for (int i = 0; i < pos; i++) { dfIns.setValue(atts.get(i), vals[i]); //System.err.println(atts.get(i)+":"+vals[i]); } return dfProp; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public Instances loadPropertiesAsInstances(String Path) { HashMap<String, String> pmap = null; HashMap rangeMap = null;// w w w . j a v a 2 s .co m try { pmap = Yaml.loadType(new FileInputStream(yamlPath), HashMap.class); rangeMap = Yaml.loadType(new FileInputStream(yamlPath + "_range"), HashMap.class); } catch (FileNotFoundException e) { e.printStackTrace(); } atts = new ArrayList<Attribute>(); int pos = 0; double[] vals = new double[pmap.size()]; Object range = null; for (Map.Entry<String, String> ent : pmap.entrySet()) { try { double val = Double.valueOf(String.valueOf(ent.getValue())); vals[pos] = val; Properties p1 = new Properties(); range = rangeMap.get(ent.getKey()); if (range != null) { String list = (String) range; if (list.indexOf('[') == -1 && list.indexOf('(') == -1) throw new Exception("No Range for You" + ent.getKey()); p1.setProperty("range", list.trim()); } else { double upper, lower; if (val != 0) { upper = val * (1. + 0.5); lower = val * (1. - 0.5); } else { lower = val; upper = 1; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); } ProtectedProperties prop1 = new ProtectedProperties(p1); atts.add(new Attribute(String.valueOf(ent.getKey()), prop1)); pos++; } catch (Exception e) { } } Instances dfProp = new Instances("DefaultConfig", atts, 1); Instance dfIns = new DenseInstance(atts.size()); for (int i = 0; i < pos; i++) { dfIns.setValue(atts.get(i), vals[i]); //System.err.println(atts.get(i)+":"+vals[i]); } dfProp.add(dfIns); dfIns.setDataset(dfProp); return dfProp; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static void testCOMT2() throws Exception { BestConf bestconf = new BestConf(); Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff"); trainingSet.setClassIndex(trainingSet.numAttributes() - 1); Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false); samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes()); samplePoints.setClassIndex(samplePoints.numAttributes() - 1); COMT2 comt = new COMT2(samplePoints, COMT2Iteration); comt.buildClassifier(trainingSet);/*from www.j a va 2 s . c o m*/ Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(comt, trainingSet); System.err.println(eval.toSummaryString()); Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance()); Instances bestInstances = new Instances(trainingSet, 2); bestInstances.add(best); DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances); //now we output the training set with the class value updated as the predicted value Instances output = new Instances(trainingSet, trainingSet.numInstances()); Enumeration<Instance> enu = trainingSet.enumerateInstances(); while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); double[] values = ins.toDoubleArray(); values[values.length - 1] = comt.classifyInstance(ins); output.add(ins.copy(values)); } DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output); }
From source file:cn.ict.zyq.bestConf.bestConf.RBSoDDSOptimization.java
License:Open Source License
@Override public void optimize(String preLoadDatasetPath) { ResumeParams rParams = resumePrepareTry(); boolean justAfterResume = rParams.isResuming; //detect whether we need to resume if (rParams.isResuming) preLoadDatasetPath = null;/*from w w w.java2s .c o m*/ double tempBest; while (opParams.currentround < RRSMaxRounds) { //is it a global search if (samplePoints == null || rParams.propsRound < opParams.currentround) { props = bestconf.getAttributes(); saveProps(props, opParams.currentround, opParams.subround);//for resumability opParams.saveToFile(); } if (opParams.currentround != 0 || opParams.subround != 0) { if (!justAfterResume || (justAfterResume && (rParams.samplePointRound < opParams.currentround || rParams.samplePointSubRound < opParams.subround))) { //let's do the sampling ((DDSSampler) sampler).setCurrentRound(opParams.currentround); samplePoints = sampler.getMultiDimContinuous(props, InitialSampleSetSize, false, bestconf); saveSamplePoints(samplePoints, opParams.currentround, opParams.subround); } if (!justAfterResume || (justAfterResume && rParams.trainingRound < opParams.currentround || rParams.trainingSubRound < opParams.subround)) { //traverse the set and initiate the experiments trainingSet = bestconf.runExp(samplePoints, opParams.currentround, "RRS" + String.valueOf(opParams.subround), justAfterResume); saveTrainingSet(trainingSet, opParams.currentround, opParams.subround); } } else {//(currentround==0 && subround==0) if (preLoadDatasetPath == null) { if (samplePoints == null) { //let's do the sampling ((DDSSampler) sampler).setCurrentRound(opParams.currentround); samplePoints = sampler.getMultiDimContinuous(props, InitialSampleSetSize, false, bestconf); samplePoints.add(0, bestconf.defltSettings.firstInstance()); saveSamplePoints(samplePoints, opParams.currentround, opParams.subround); } if (trainingSet == null) { //traverse the set and initiate the experiments trainingSet = bestconf.runExp(samplePoints, opParams.currentround, "RRS" + String.valueOf(opParams.subround), justAfterResume); saveTrainingSet(trainingSet, opParams.currentround, opParams.subround); } } else { try { bestconf.allInstances = DataIOFile.loadDataFromArffFile(preLoadDatasetPath); bestconf.allInstances.setClassIndex(bestconf.allInstances.numAttributes() - 1); samplePoints = trainingSet = new Instances(bestconf.allInstances); saveSamplePoints(samplePoints, opParams.currentround, opParams.subround); saveTrainingSet(trainingSet, opParams.currentround, opParams.subround); } catch (IOException e) { e.printStackTrace(); } } } //get the point with the best performance Instance tempIns = BestConf.findBestPerf(trainingSet); tempBest = tempIns.value(trainingSet.numAttributes() - 1); if (tempBest > opParams.currentBest || (justAfterResume && tempBest == opParams.currentBest && (rParams.propsRound < opParams.currentround || rParams.propsSubRound < opParams.subround))) { System.err.println("Previous best is " + opParams.currentBest + "; Current best is " + tempBest); opParams.currentBest = tempBest; opParams.currentIns = tempIns; opParams.saveToFile(); try { //output the best instance of this round Instances bestInstances = new Instances(samplePoints, 1); bestInstances.add(opParams.currentIns); DataIOFile.saveDataToArffFile("data/trainingBestConf_RRS_" + opParams.currentround + "_" + opParams.subround + "_" + opParams.currentBest + ".arff", bestInstances); } catch (IOException e) { e.printStackTrace(); } //let's search locally if (!justAfterResume || (justAfterResume && rParams.propsRound < opParams.currentround || rParams.propsSubRound < opParams.subround)) { props = ConfigSampler.scaleDownDetour(trainingSet, tempIns); saveProps(props, opParams.currentround, opParams.subround);//for resumability } opParams.subround++; opParams.saveToFile(); } else {//let's do the restart samplePoints = null; opParams.currentround++; opParams.subround = 0; opParams.saveToFile(); System.err.println("Entering into round " + opParams.currentround); /*if(opParams.currentround>=RRSMaxRounds) break;*/ } justAfterResume = false; } //RRS search System.err.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); System.err.println("We are ending the optimization experiments!"); System.err.println("Please wait and don't shutdown!"); System.err.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); System.err.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); //output the best Map<Attribute, Double> attsmap = BestConf.instanceToMap(opParams.currentIns); System.out.println(attsmap.toString()); //set the best configuration to the cluster System.err.println("The best performance is : " + opParams.currentBest); System.out.println("========================================="); TxtFileOperation.writeToFile("bestConfOutput_RRS", attsmap.toString() + "\n"); System.out.println("========================================="); //output the whole trainings dataset try { DataIOFile.saveDataToArffFile("data/trainingAllRSS.arff", bestconf.allInstances); } catch (IOException e) { e.printStackTrace(); } }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.DDSSampler.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * //from w w w. j a v a 2 s . co m * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { ArrayList<Integer>[] crntSetPerm; //only initialize once if (sets == null) { //possible number of sample sets will not exceed $sampleSetSize to the power of 2 int L = (int) Math.min(rounds, atts.size() > 2 ? Math.pow(sampleSetSize, atts.size() - 1) : (atts.size() > 1 ? sampleSetSize : 1)); //initialization dists = new long[L]; sets = new ArrayList[L][]; for (int i = 0; i < L; i++) { dists[i] = -1; sets[i] = null; } long maxMinDist = -1; int posWithMaxMinDist = -1; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); while (inAlready(sets, setPerm))//continue the samples set generation till different samples are obtained setPerm = generateOneSampleSet(sampleSetSize, atts.size()); sets[i] = setPerm; //compute the minimum distance minDist between any sample pair for each set dists[i] = minDistForSet(setPerm); //select the set with the maximum minDist if (dists[i] > maxMinDist) { posWithMaxMinDist = i; maxMinDist = dists[i]; } } //now let the first sample set be the one with the max mindist positionSwitch(sets, dists, 0, posWithMaxMinDist); } crntSetPerm = sets[sampleSetToGet]; //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; boolean[] roundToInt = new boolean[atts.size()]; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize); //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize); if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize) roundToInt[i] = true; } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("SamplesByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][crntSetPerm[j].get(i)] + bounds[j][crntSetPerm[j].get(i) + 1]) / 2 : bounds[j][crntSetPerm[j].get(i)] + ((bounds[j][crntSetPerm[j].get(i) + 1] - bounds[j][crntSetPerm[j].get(i)]) * uniRand.nextDouble()); if (roundToInt[j]) vals[j] = (int) vals[j]; } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java
License:Open Source License
/** * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable * /*from w w w. j a v a 2 s . co m*/ * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); if (crntAttr.isNumeric()) { bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize; for (int j = 1; j < sampleSetSize; j++) { bounds[i][j] = bounds[i][j - 1] + pace; } } else {//crntAttr.isNominal() if (crntAttr.numValues() >= sampleSetSize) { //randomly select among the set for (int j = 0; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values } else { //first round-robin int lastPart = sampleSetSize % crntAttr.numValues(); for (int j = 0; j < sampleSetSize - lastPart; j++) bounds[i][j] = j % crntAttr.numValues(); //then randomly select for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues()); } } //nominal attribute } //get all subdomains //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { if (atts.get(j).isNumeric()) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } else {//isNominal() vals[j] = bounds[j][setWithMaxMinDist[j].get(i)]; } } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * //from w w w .j a v a 2 s . c o m * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; boolean[] roundToInt = new boolean[atts.size()]; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize); //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize); if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize) roundToInt[i] = true; } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("SamplesByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); if (roundToInt[j]) vals[j] = (int) vals[j]; } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java
License:Open Source License
/** * At current version, we assume all attributes are numeric attributes with bounds * //from w ww . j av a 2 s . c o m * Let PACE be upper-lower DIVided by the sampleSetSize * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ private static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (bounds[i][sampleSetSize] - bounds[i][0]) / sampleSetSize; for (int j = 1; j < sampleSetSize; j++) { bounds[i][j] = bounds[i][j - 1] + pace; } } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }