List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:edu.indiana.d2i.htrc.kmeans.MemRandomSeedGenerator.java
License:Apache License
public static void buildRandom(Configuration conf, Path input, int k, DistanceMeasure measure) throws IOException { // build id list FileSystem fs = FileSystem.get(conf); DataInputStream fsinput = new DataInputStream(fs.open(input)); Iterator<Text> idIterator = new IDList(fsinput).iterator(); List<String> idlist = new ArrayList<String>(); while (idIterator.hasNext()) { Text id = idIterator.next(); idlist.add(id.toString());//from ww w .j ava2 s . c o m } ThreadedMemcachedClient client = ThreadedMemcachedClient.getThreadedMemcachedClient(conf); MemcachedClient cache = client.getCache(); Transcoder<VectorWritable> vectorTranscoder = new HadoopWritableTranscoder<VectorWritable>(conf, VectorWritable.class); // pick k random id List<Text> chosenTexts = Lists.newArrayListWithCapacity(k); List<Cluster> chosenClusters = Lists.newArrayListWithCapacity(k); int nextClusterId = 0; // Random random = RandomUtils.getRandom(); // for (String id : idlist) { // VectorWritable vectorWritable = cache.get(id, vectorTranscoder); // if (vectorWritable != null) { // Cluster newCluster = new Cluster(vectorWritable.get(), // nextClusterId++, measure); // newCluster.observe(vectorWritable.get(), 1); // Text newText = new Text(id); // int currentSize = chosenTexts.size(); //// if (currentSize < k) { //// chosenTexts.add(newText); //// chosenClusters.add(newCluster); //// } else if (random.nextInt(currentSize + 1) != 0) { //// int indexToRemove = random.nextInt(currentSize); //// chosenTexts.remove(indexToRemove); //// chosenClusters.remove(indexToRemove); //// chosenTexts.add(newText); //// chosenClusters.add(newCluster); //// } // } else { // logger.error("cannot find VectorWritable for " + id); //// throw new RuntimeException("cannot find VectorWritable for " + id); // } // } for (int i = 0; i < k; i++) { String id = idlist.get(i); VectorWritable vectorWritable = cache.get(id, vectorTranscoder); System.out.println("pick " + id); if (vectorWritable != null) { Cluster newCluster = new Cluster(vectorWritable.get(), nextClusterId++, measure); chosenClusters.add(newCluster); } else { logger.error("cannot find VectorWritable for " + id); throw new RuntimeException("cannot find VectorWritable for " + id); } } // write out the seeds to Memcached int maxExpir = conf.getInt(HTRCConstants.MEMCACHED_MAX_EXPIRE, -1); Transcoder<Cluster> clusterTranscoder = new HadoopWritableTranscoder<Cluster>(conf, Cluster.class); for (int i = 0; i < chosenClusters.size(); i++) { System.out.println("set cluster " + MemKMeansUtil.toClusterName(i)); cache.set(MemKMeansUtil.toClusterName(i), maxExpir, chosenClusters.get(i), clusterTranscoder); Cluster cluster = cache.get(MemKMeansUtil.toClusterName(i), clusterTranscoder); if (cluster != null) { DataOutputBuffer buf = new DataOutputBuffer(); cluster.write(buf); System.out.println("read from memcached " + cluster.getIdentifier() + " size " + buf.size()); } } client.close(); }
From source file:edu.indiana.d2i.htrc.skmeans.StreamingKMeansAdapter.java
License:Apache License
public StreamingKMeansAdapter(Configuration conf) { float cutoff = conf.getFloat(StreamingKMeansConfigKeys.CUTOFF, 0); int maxClusters = conf.getInt(StreamingKMeansConfigKeys.MAXCLUSTER, 0); final int dim = conf.getInt(StreamingKMeansConfigKeys.VECTOR_DIMENSION, 0); final DistanceMeasure measure = ClassUtils .instantiateAs(conf.get(StreamingKMeansConfigKeys.DIST_MEASUREMENT), DistanceMeasure.class); if (cutoff == 0 || maxClusters == 0 || dim == 0) throw new RuntimeException("Illegal parameters for streaming kmeans, cutoff: " + cutoff + ", maxClusters: " + maxClusters + ", dimension: " + dim); this.maxClusters = maxClusters; this.distanceCutoff = cutoff; this.centroidFactory = new StreamingKmeans.CentroidFactory() { @Override/*from ww w .j a v a 2 s . c om*/ public UpdatableSearcher create() { // (dimension, distance obj, 0 < #projections < 100, searchSize) // return new ProjectionSearch(dim, measure, 8, 20); return new ProjectionSearch(dim, measure, 1, 2); // return new Brute(measure); } }; this.centroids = centroidFactory.create(); }
From source file:edu.indiana.d2i.htrc.util.DataAPITestDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { String dataAPIConfClassName = args[0]; int maxIdsPerReq = Integer.valueOf(args[1]); String queryStr = args[2];//from w ww. j a v a 2 s. c om Configuration conf = getConf(); Utilities.setDataAPIConf(conf, dataAPIConfClassName, maxIdsPerReq); int maxIdRetrieved = conf.getInt(HTRCConstants.MAX_ID_RETRIEVED, 100); String dataEPR = conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA).split(",")[0]; String delimitor = conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|"); String clientID = conf.get(HTRCConstants.DATA_API_CLIENTID, "yim"); String clientSecrete = conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim"); String tokenLoc = conf.get(HTRCConstants.DATA_API_TOKENLOC, "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials"); boolean selfsigned = conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true); if (dataEPR.equals(HTRCConstants.DATA_API_DEFAULT_URL)) { dataEPR = HTRCConstants.DATA_API_DEFAULT_URL_PREFIX + dataEPR; } HTRCDataAPIClient dataClient = new HTRCDataAPIClient.Builder(dataEPR, delimitor).authentication(true) .selfsigned(selfsigned).clientID(clientID).clientSecrete(clientSecrete).tokenLocation(tokenLoc) .build(); // String queryStr = "yale.39002052249902|uc2.ark:/13960/t88g8h13f|uc2.ark:/13960/t6sx67388|uc2.ark:/13960/t5j96547r|uc2.ark:/13960/t6ww79z3v|yale.39002085406669|miua.4918260.0305.001|uc2.ark:/13960/t3416xb23|uc2.ark:/13960/t86h4mv25|loc.ark:/13960/t2k64mv58|"; Iterable<Entry<String, String>> entries = dataClient.getID2Content(queryStr); for (Entry<String, String> entry : entries) { System.out.println(entry.getKey()); } return 0; }
From source file:edu.indiana.d2i.htrc.util.IDValidation.java
License:Apache License
@Override public int run(String[] args) throws Exception { System.out.println("args.length " + args.length); String dataAPIConfClassName = args[0]; int maxIdsPerReq = Integer.valueOf(args[1]); String idfile = args[2];/* www . j ava 2s. c om*/ String accurateIdfile = args[3]; Configuration conf = getConf(); Utilities.setDataAPIConf(conf, dataAPIConfClassName, maxIdsPerReq); int maxIdRetrieved = conf.getInt(HTRCConstants.MAX_ID_RETRIEVED, 100); String dataEPR = conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA).split(",")[0]; String delimitor = conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|"); String clientID = conf.get(HTRCConstants.DATA_API_CLIENTID, "yim"); String clientSecrete = conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim"); String tokenLoc = conf.get(HTRCConstants.DATA_API_TOKENLOC, "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials"); boolean selfsigned = conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true); if (dataEPR.equals(HTRCConstants.DATA_API_DEFAULT_URL)) { dataEPR = HTRCConstants.DATA_API_DEFAULT_URL_PREFIX + dataEPR; } HTRCDataAPIClient dataClient = new HTRCDataAPIClient.Builder(dataEPR, delimitor).authentication(true) .selfsigned(selfsigned).clientID(clientID).clientSecrete(clientSecrete).tokenLocation(tokenLoc) .build(); BufferedReader reader = new BufferedReader(new FileReader(idfile)); BufferedWriter writer = new BufferedWriter(new FileWriter(accurateIdfile)); String line = null; int count = 0; while ((line = reader.readLine()) != null) { Iterable<Entry<String, String>> content = dataClient.getID2Content(line); if (content != null) writer.write(line + "\n"); if ((++count) % 1000 == 0) System.out.println("Finish " + count + " volumes."); } reader.close(); writer.close(); // String queryStr = "yale.39002052249902|uc2.ark:/13960/t88g8h13f|uc2.ark:/13960/t6sx67388|uc2.ark:/13960/t5j96547r|uc2.ark:/13960/t6ww79z3v|yale.39002085406669|miua.4918260.0305.001|uc2.ark:/13960/t3416xb23|uc2.ark:/13960/t86h4mv25|loc.ark:/13960/t2k64mv58|"; // Iterable<Entry<String, String>> entries = dataClient.getID2Content(queryStr); // for (Entry<String, String> entry : entries) { // System.out.println(entry.getKey()); // } // return 0; }
From source file:edu.indiana.d2i.htrc.util.Utilities.java
License:Apache License
public static void setDataAPIConf(Configuration conf, String dataAPIConfClassName, int maxIdsPerReq) throws ClassNotFoundException { Class<?> dataAPIConfClass = Class.forName(dataAPIConfClassName); DataAPIDefaultConf confInstance = (DataAPIDefaultConf) ReflectionUtils.newInstance(dataAPIConfClass, conf); confInstance.configurate(conf, maxIdsPerReq); logger.info("Data API configuration"); logger.info(" - host: " + conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA, "129-79-49-119.dhcp-bl.indiana.edu:25443/data-api")); logger.info(" - delimitor: " + conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|")); logger.info(" - clientID: " + conf.get(HTRCConstants.DATA_API_CLIENTID, "yim")); logger.info(" - clientSecret: " + conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim")); logger.info(" - tokenLoc: " + conf.get(HTRCConstants.DATA_API_TOKENLOC, "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials")); logger.info(" - selfsigned: " + conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true)); logger.info(" - maxIDRetrieved: " + conf.getInt(HTRCConstants.MAX_ID_RETRIEVED, 100)); }
From source file:edu.indiana.d2i.htrc.util.Utilities.java
License:Apache License
public static void filterUnexistID(String input, String output) throws Exception { BufferedReader reader = new BufferedReader(new FileReader(input)); BufferedWriter writer = new BufferedWriter(new FileWriter(output)); Configuration conf = new Configuration(); Utilities.setDataAPIConf(conf, "edu.indiana.d2i.htrc.io.DataAPISilvermapleConf", 1); int maxIdRetrieved = conf.getInt(HTRCConstants.MAX_ID_RETRIEVED, 100); String dataEPR = conf.get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA).split(",")[0]; String delimitor = conf.get(HTRCConstants.DATA_API_URL_DELIMITOR, "|"); String clientID = conf.get(HTRCConstants.DATA_API_CLIENTID, "yim"); String clientSecrete = conf.get(HTRCConstants.DATA_API_CLIENTSECRETE, "yim"); String tokenLoc = conf.get(HTRCConstants.DATA_API_TOKENLOC, "https://129-79-49-119.dhcp-bl.indiana.edu:25443/oauth2/token?grant_type=client_credentials"); boolean selfsigned = conf.getBoolean(HTRCConstants.DATA_API_SELFSIGNED, true); if (dataEPR.equals(HTRCConstants.DATA_API_DEFAULT_URL)) { dataEPR = HTRCConstants.DATA_API_DEFAULT_URL_PREFIX + dataEPR; }/*from w w w .ja v a2 s .c o m*/ HTRCDataAPIClient dataClient = new HTRCDataAPIClient.Builder(dataEPR, delimitor).authentication(true) .selfsigned(selfsigned).clientID(clientID).clientSecrete(clientSecrete).tokenLocation(tokenLoc) .build(); String line = null; while ((line = reader.readLine()) != null) { Iterable<Entry<String, String>> content = dataClient.getID2Content(line); if (content != null) writer.write(line + "\n"); } writer.close(); reader.close(); }
From source file:edu.isi.mavuno.extract.CombineGlobalStats.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String inputPath = MavunoUtils.getRequiredParam("Mavuno.CombineGlobalStats.InputPath", conf); String outputPath = MavunoUtils.getRequiredParam("Mavuno.CombineGlobalStats.OutputPath", conf); int numSplits = conf.getInt("Mavuno.CombineGlobalStats.TotalSplits", 1); sLogger.info("Tool name: CombineGlobalStats"); sLogger.info(" - Input path: " + inputPath); sLogger.info(" - Output path: " + outputPath); sLogger.info(" - Number of splits: " + numSplits); Job job = new Job(conf); job.setJobName("CombineGlobalStats"); for (int split = 0; split < numSplits; split++) { FileInputFormat.addInputPath(job, new Path(inputPath + "/" + split)); }//from w ww. jav a2s . c om FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(ContextPatternWritable.class); job.setSortComparatorClass(ContextPatternWritable.Comparator.class); job.setMapOutputValueClass(ContextPatternStatsWritable.class); job.setOutputKeyClass(ContextPatternWritable.class); job.setOutputValueClass(ContextPatternStatsWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.waitForCompletion(true); return 0; }
From source file:edu.isi.mavuno.extract.CombineSplits.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String examplesPath = MavunoUtils.getRequiredParam("Mavuno.CombineSplits.ExamplesPath", conf); String exampleStatsPath = MavunoUtils.getRequiredParam("Mavuno.CombineSplits.ExampleStatsPath", conf); String splitKey = MavunoUtils.getRequiredParam("Mavuno.CombineSplits.SplitKey", conf).toLowerCase(); int numSplits = conf.getInt("Mavuno.CombineSplits.TotalSplits", 1); String outputPath = MavunoUtils.getRequiredParam("Mavuno.CombineSplits.OutputPath", conf); sLogger.info("Tool name: CombineSplits"); sLogger.info(" - Examples path: " + examplesPath); sLogger.info(" - Example stats path: " + exampleStatsPath); sLogger.info(" - Split key: " + splitKey); sLogger.info(" - Total splits: " + numSplits); sLogger.info(" - Output path: " + outputPath); Job job = new Job(conf); job.setJobName("CombineSplits"); for (int split = 0; split < numSplits; split++) { FileInputFormat.addInputPath(job, new Path(examplesPath + "/" + split)); }// ww w. j av a 2s . c om if (MavunoUtils.pathExists(conf, exampleStatsPath)) { FileInputFormat.addInputPath(job, new Path(exampleStatsPath)); } FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(ContextPatternWritable.class); if ("pattern".equals(splitKey)) { job.setSortComparatorClass(ContextPatternWritable.Comparator.class); } else if ("context".equals(splitKey)) { job.setSortComparatorClass(ContextPatternWritable.IdPatternComparator.class); } else if ("pattern+context".equals(splitKey)) { job.setSortComparatorClass(ContextPatternWritable.Comparator.class); } else { throw new RuntimeException("Invalid SplitKey in CombineSplits! -- " + splitKey); } job.setMapOutputValueClass(ContextPatternStatsWritable.class); job.setOutputKeyClass(ContextPatternWritable.class); job.setOutputValueClass(ContextPatternStatsWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.waitForCompletion(true); return 0; }
From source file:edu.iu.benchmark.BenchmarkMapper.java
License:Apache License
/** * Mapper configuration./*from w w w.j ava 2s . c o m*/ */ @Override protected void setup(Context context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); cmd = configuration.get(Constants.BENCHMARK_CMD, "bcast"); numMappers = configuration.getInt(Constants.NUM_MAPPERS, 1); numPartitions = configuration.getInt(Constants.NUM_PARTITIONS, 1); bytesPerPartition = configuration.getInt(Constants.BYTES_PER_PARTITION, 1); numIterations = configuration.getInt(Constants.NUM_ITERATIONS, 1); LOG.info("Benchmark CMD " + cmd); LOG.info("Num Mappers " + numMappers); LOG.info("Num Partitions " + numPartitions); LOG.info("Bytes per Partition " + bytesPerPartition); LOG.info("Num Iterations " + numIterations); }
From source file:edu.iu.ccd.CCDMPCollectiveMapper.java
License:Apache License
/** * Mapper configuration./*from ww w. ja va 2 s .c om*/ */ @Override protected void setup(Context context) { LOG.info( "start setup: " + new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime())); long startTime = System.currentTimeMillis(); Configuration configuration = context.getConfiguration(); r = configuration.getInt(Constants.R, 100); lambda = configuration.getDouble(Constants.LAMBDA, 0.001); numIterations = configuration.getInt(Constants.NUM_ITERATIONS, 100); numThreads = configuration.getInt(Constants.NUM_THREADS, 16); modelDirPath = configuration.get(Constants.MODEL_DIR, ""); numModelSlices = configuration.getInt(Constants.NUM_MODEL_SLICES, 2); testFilePath = configuration.get(Constants.TEST_FILE_PATH, ""); rmseIteInterval = 1; printRMSE = false; testRMSE = 0.0; computeTime = 0L; prepareResTime = 0L; totalNumV = 0L; waitTime = 0L; long endTime = System.currentTimeMillis(); LOG.info("config (ms): " + (endTime - startTime)); LOG.info("R " + r); LOG.info("Lambda " + lambda); LOG.info("No. Iterations " + numIterations); LOG.info("No. Threads " + numThreads); LOG.info("Model Dir Path " + modelDirPath); LOG.info("No. Model Slices " + numModelSlices); LOG.info("TEST FILE PATH " + testFilePath); }