Example usage for org.apache.hadoop.conf Configuration getInt

List of usage examples for org.apache.hadoop.conf Configuration getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:com.datatorrent.demos.mobile.Application.java

License:Open Source License

@Override
public void populateDAG(DAG dag, Configuration conf) {
    dag.setAttribute(DAG.DEBUG, true);//from  w  w  w . j  av a  2s.  c  om

    String lPhoneRange = conf.get(PHONE_RANGE_PROP, null);
    if (lPhoneRange != null) {
        String[] tokens = lPhoneRange.split("-");
        if (tokens.length != 2) {
            throw new IllegalArgumentException("Invalid range: " + lPhoneRange);
        }
        this.phoneRange = Range.between(Integer.parseInt(tokens[0]), Integer.parseInt(tokens[1]));
    }
    LOG.debug("Phone range {}", this.phoneRange);

    RandomEventGenerator phones = dag.addOperator("phonegen", RandomEventGenerator.class);
    phones.setMinvalue(this.phoneRange.getMinimum());
    phones.setMaxvalue(this.phoneRange.getMaximum());
    phones.setTuplesBlast(200);
    phones.setTuplesBlastIntervalMillis(5);
    dag.setOutputPortAttribute(phones.integer_data, PortContext.QUEUE_CAPACITY, 32 * 1024);

    PhoneMovementGenerator movementGen = dag.addOperator("pmove", PhoneMovementGenerator.class);
    movementGen.setRange(20);
    movementGen.setThreshold(80);
    dag.setAttribute(movementGen, OperatorContext.INITIAL_PARTITION_COUNT, 2);
    dag.setAttribute(movementGen, OperatorContext.COUNTERS_AGGREGATOR,
            new BasicCounters.LongAggregator<MutableLong>());

    ThroughputBasedPartitioner<PhoneMovementGenerator> partitioner = new ThroughputBasedPartitioner<PhoneMovementGenerator>();
    partitioner.setCooldownMillis(45000);
    partitioner.setMaximumEvents(30000);
    partitioner.setMinimumEvents(10000);
    dag.setAttribute(movementGen, OperatorContext.STATS_LISTENERS,
            Arrays.asList(new StatsListener[] { partitioner }));
    dag.setAttribute(movementGen, OperatorContext.PARTITIONER, partitioner);
    dag.setInputPortAttribute(movementGen.data, PortContext.QUEUE_CAPACITY, 32 * 1024);

    // default partitioning: first connected stream to movementGen will be partitioned
    dag.addStream("phonedata", phones.integer_data, movementGen.data);

    // generate seed numbers
    Random random = new Random();
    int maxPhone = phoneRange.getMaximum() - phoneRange.getMinimum();
    int phonesToDisplay = conf.getInt(TOTAL_SEED_NOS, 10);

    for (int i = phonesToDisplay; i-- > 0;) {
        int phoneNo = phoneRange.getMinimum() + random.nextInt(maxPhone + 1);
        LOG.info("seed no: " + phoneNo);
        movementGen.phoneRegister.add(phoneNo);
    }

    // done generating data
    LOG.info("Finished generating seed data.");

    String gatewayAddress = dag.getValue(DAG.GATEWAY_CONNECT_ADDRESS);
    if (!StringUtils.isEmpty(gatewayAddress)) {
        URI uri = URI.create("ws://" + gatewayAddress + "/pubsub");
        LOG.info("WebSocket with gateway at: {}", gatewayAddress);

        PubSubWebSocketOutputOperator<Object> wsOut = dag.addOperator("phoneLocationQueryResultWS",
                new PubSubWebSocketOutputOperator<Object>());
        wsOut.setUri(uri);
        wsOut.setTopic("demos.mobile.phoneLocationQueryResult");

        PubSubWebSocketInputOperator wsIn = dag.addOperator("phoneLocationQueryWS",
                new PubSubWebSocketInputOperator());
        wsIn.setUri(uri);
        wsIn.addTopic("demos.mobile.phoneLocationQuery");

        dag.addStream("consoledata", movementGen.locationQueryResult, wsOut.input);
        dag.addStream("query", wsIn.outputPort, movementGen.phoneQuery);
    } else {
        // for testing purposes without server
        movementGen.phoneRegister.add(5554995);
        movementGen.phoneRegister.add(5556101);
        ConsoleOutputOperator out = dag.addOperator("phoneLocationQueryResult", new ConsoleOutputOperator());
        out.setStringFormat("phoneLocationQueryResult" + ": %s");
        dag.addStream("consoledata", movementGen.locationQueryResult, out.input);
    }
}

From source file:com.datatorrent.demos.mrmonitor.MRMonitoringApplication.java

License:Open Source License

@Override
public void populateDAG(DAG dag, Configuration conf) {
    String daemonAddress = dag.getValue(DAG.GATEWAY_CONNECT_ADDRESS);
    if (daemonAddress == null || StringUtils.isEmpty(daemonAddress)) {
        daemonAddress = "10.0.2.15:9790";
    }//from   w w  w  .jav  a  2  s  .c o m

    int numberOfPartitions = conf
            .getInt(MRMonitoringApplication.class.getName() + ".numberOfMonitoringOperators", 1);
    int maxNumberOfJobs = conf.getInt(MRMonitoringApplication.class.getName() + ".maxNumberOfJobsPerOperator",
            Constants.MAX_MAP_SIZE);
    // logger.info(" number of partitions {} ",numberOfPartitions);

    MRJobStatusOperator mrJobOperator = dag.addOperator("Monitoring-Operator", new MRJobStatusOperator());
    mrJobOperator.setMaxMapSize(maxNumberOfJobs);
    mrJobOperator.setSleepTime(200);
    dag.setAttribute(mrJobOperator, OperatorContext.INITIAL_PARTITION_COUNT, numberOfPartitions);
    dag.setAttribute(mrJobOperator, OperatorContext.APPLICATION_WINDOW_COUNT, 4);

    URI uri = URI.create("ws://" + daemonAddress + "/pubsub");
    logger.info("WebSocket with daemon at {}", daemonAddress);

    PubSubWebSocketInputOperator wsIn = dag.addOperator("Input-Query-Operator",
            new PubSubWebSocketInputOperator());
    wsIn.setUri(uri);
    wsIn.addTopic("contrib.summit.mrDebugger.mrDebuggerQuery");

    MapToMRObjectOperator convertorOper = dag.addOperator("Input-Query-Conversion-Operator",
            new MapToMRObjectOperator());
    dag.addStream("queryConversion", wsIn.outputPort, convertorOper.input)
            .setLocality(Locality.CONTAINER_LOCAL);

    dag.addStream("queryProcessing", convertorOper.output, mrJobOperator.input);

    /**
     * This is used to emit the meta data about the job
     */
    PubSubWebSocketOutputOperator<Object> wsOut = dag.addOperator("Job-Output-Operator",
            new PubSubWebSocketOutputOperator<Object>());
    wsOut.setUri(uri);
    wsOut.setTopic("contrib.summit.mrDebugger.jobResult");

    /**
     * This is used to emit the information of map tasks of the job
     */
    PubSubWebSocketOutputOperator<Object> wsMapOut = dag.addOperator("Map-Output-Operator",
            new PubSubWebSocketOutputOperator<Object>());
    wsMapOut.setUri(uri);
    wsMapOut.setTopic("contrib.summit.mrDebugger.mapResult");

    /**
     * This is used to emit the information of reduce tasks of the job
     */
    PubSubWebSocketOutputOperator<Object> wsReduceOut = dag.addOperator("Reduce-Output-Operator",
            new PubSubWebSocketOutputOperator<Object>());
    wsReduceOut.setUri(uri);
    wsReduceOut.setTopic("contrib.summit.mrDebugger.reduceResult");

    /**
     * This is used to emit the metric information of the job
     */
    PubSubWebSocketOutputOperator<Object> wsCounterOut = dag.addOperator("Counter-Output-Operator",
            new PubSubWebSocketOutputOperator<Object>());
    wsCounterOut.setUri(uri);
    wsCounterOut.setTopic("contrib.summit.mrDebugger.counterResult");

    dag.addStream("jobConsoledata", mrJobOperator.output, wsOut.input);
    dag.addStream("mapConsoledata", mrJobOperator.mapOutput, wsMapOut.input);
    dag.addStream("reduceConsoledata", mrJobOperator.reduceOutput, wsReduceOut.input);
    dag.addStream("counterConsoledata", mrJobOperator.counterOutput, wsCounterOut.input);

}

From source file:com.datatorrent.demos.mroperator.MapReduceApplication.java

License:Open Source License

@Override
public void populateDAG(DAG dag, Configuration conf) {
    conf();// w  w w  . j a  va2 s .c om

    String dirName = conf.get(this.getClass().getName() + ".inputDirName", "src/test/resources/wordcount/");
    String outputDirName = conf.get(this.getClass().getName() + ".outputDirName", "src/test/resources/output");
    int numberOfReducers = conf.getInt(this.getClass().getName() + ".numOfReducers", 1);
    int numberOfMaps = conf.getInt(this.getClass().getName() + ".numOfMaps", 2);
    String configurationfilePath = conf.get(this.getClass().getName() + ".configFile", "");

    // logger.info("configfile {}", configurationfilePath);
    MapOperator<K1, V1, K2, V2> inputOperator = dag.addOperator("map", new MapOperator<K1, V1, K2, V2>());
    inputOperator.setInputFormatClass(inputFormat);
    inputOperator.setDirName(dirName);
    dag.setAttribute(inputOperator, OperatorContext.INITIAL_PARTITION_COUNT, numberOfMaps);

    String configFileName = null;
    if (configurationfilePath != null && !configurationfilePath.isEmpty()) {
        dag.setAttribute(DAGContext.LIBRARY_JARS, configurationfilePath);
        StringTokenizer configFileTokenizer = new StringTokenizer(configurationfilePath, "/");
        configFileName = configFileTokenizer.nextToken();
        while (configFileTokenizer.hasMoreTokens())
            configFileName = configFileTokenizer.nextToken();
    }

    inputOperator.setMapClass(mapClass);
    inputOperator.setConfigFile(configFileName);
    inputOperator.setCombineClass(combineClass);

    ReduceOperator<K2, V2, K2, V2> reduceOpr = dag.addOperator("reduce", new ReduceOperator<K2, V2, K2, V2>());
    reduceOpr.setReduceClass(reduceClass);
    reduceOpr.setConfigFile(configFileName);
    dag.setAttribute(reduceOpr, Context.OperatorContext.INITIAL_PARTITION_COUNT, numberOfReducers);

    HdfsKeyValOutputOperator<K2, V2> console = dag.addOperator("console",
            new HdfsKeyValOutputOperator<K2, V2>());
    console.setFilePath(outputDirName);
    // ConsoleOutputOperator console = dag.addOperator("console", new
    // ConsoleOutputOperator());

    dag.addStream("input_map", inputOperator.output, reduceOpr.input);
    dag.addStream("input_count_map", inputOperator.outputCount, reduceOpr.inputCount);

    dag.addStream("console_reduce", reduceOpr.output, console.input);

}

From source file:com.datatorrent.demos.scalability.ScalableAdsApp.java

License:Open Source License

@Override
public void populateDAG(DAG dag, Configuration conf) {
    //dag.setAttribute(DAG.APPLICATION_NAME, "ScalableAdsApplication");
    dag.setAttribute(DAG.STREAMING_WINDOW_SIZE_MILLIS, WINDOW_SIZE_MILLIS);

    int heartbeat_interval = conf.getInt(ScalableAdsApp.class.getName() + ".heartbeat_interval", 1000);
    int heartbeat_timeout = conf.getInt(ScalableAdsApp.class.getName() + ".heartbeat_timeout", 30000);
    int unifier_count = conf.getInt(ScalableAdsApp.class.getName() + ".unifier_count", 2);

    dag.setAttribute(DAG.HEARTBEAT_INTERVAL_MILLIS, heartbeat_interval);
    dag.setAttribute(DAG.HEARTBEAT_TIMEOUT_MILLIS, heartbeat_timeout);

    int partitions = conf.getInt(ScalableAdsApp.class.getName() + ".partitions", 1);
    int partitions_agg = conf.getInt(ScalableAdsApp.class.getName() + ".partitions_agg", 1);

    InputItemGenerator input = dag.addOperator("input", InputItemGenerator.class);
    dag.setOutputPortAttribute(input.outputPort, PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
    dag.setAttribute(input, OperatorContext.INITIAL_PARTITION_COUNT, partitions);

    InputDimensionGenerator inputDimension = dag.addOperator("inputDimension", InputDimensionGenerator.class);
    dag.setInputPortAttribute(inputDimension.inputPort, PortContext.PARTITION_PARALLEL, true);
    dag.setInputPortAttribute(inputDimension.inputPort, PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
    dag.setOutputPortAttribute(inputDimension.outputPort, PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);

    BucketOperator bucket = dag.addOperator("bucket", BucketOperator.class);
    bucket.setPartitions(partitions_agg);
    dag.setInputPortAttribute(bucket.inputPort, PortContext.PARTITION_PARALLEL, true);
    dag.setInputPortAttribute(bucket.inputPort, PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
    dag.setOutputPortAttribute(bucket.outputPort, PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);
    //TODO: uncomment after latest version
    dag.setOutputPortAttribute(bucket.outputPort, PortContext.UNIFIER_LIMIT, unifier_count);
    dag.setAttribute(bucket, OperatorContext.APPLICATION_WINDOW_COUNT, 5);

    //MapMultiConsoleOutputOperator<AggrKey, Object> console = dag.addOperator("console", MapMultiConsoleOutputOperator.class);
    ConsoleOutputOperator console = dag.addOperator("console", ConsoleOutputOperator.class);
    dag.setAttribute(console, OperatorContext.INITIAL_PARTITION_COUNT, partitions_agg);
    console.silent = true;//w  w w .  ja va2 s  . c o m
    console.setDebug(false);
    dag.setInputPortAttribute(console.input, PortContext.QUEUE_CAPACITY, QUEUE_CAPACITY);

    dag.addStream("ingen", input.outputPort, inputDimension.inputPort).setLocality(Locality.CONTAINER_LOCAL);
    dag.addStream("indimgen", inputDimension.outputPort, bucket.inputPort).setLocality(Locality.THREAD_LOCAL);
    dag.addStream("console", bucket.outputPort, console.input);
    //dag.addStream("console", bucket.outputPort, console.input).setLocality(Locality.CONTAINER_LOCAL);
}

From source file:com.datatorrent.stram.client.StramClientUtils.java

License:Apache License

public static Configuration addDTSiteResources(Configuration conf) {
    addDTLocalResources(conf);/*from   w  w w  . j  a va  2  s.com*/
    FileSystem fs = null;
    File targetGlobalFile;
    try {
        fs = newFileSystemInstance(conf);
        // after getting the dfsRootDirectory config parameter, redo the entire process with the global config
        // load global settings from DFS
        targetGlobalFile = new File(String.format("/tmp/dt-site-global-%s.xml",
                UserGroupInformation.getLoginUser().getShortUserName()));
        org.apache.hadoop.fs.Path hdfsGlobalPath = new org.apache.hadoop.fs.Path(
                StramClientUtils.getDTDFSConfigDir(fs, conf), StramClientUtils.DT_SITE_GLOBAL_XML_FILE);
        LOG.debug("Copying global dt-site.xml from {} to {}", hdfsGlobalPath,
                targetGlobalFile.getAbsolutePath());
        fs.copyToLocalFile(hdfsGlobalPath, new org.apache.hadoop.fs.Path(targetGlobalFile.toURI()));
        addDTSiteResources(conf, targetGlobalFile);
        if (!isDevelopmentMode()) {
            // load node local config file
            addDTSiteResources(conf,
                    new File(StramClientUtils.getConfigDir(), StramClientUtils.DT_SITE_XML_FILE));
        }
        // load user config file
        addDTSiteResources(conf,
                new File(StramClientUtils.getUserDTDirectory(), StramClientUtils.DT_SITE_XML_FILE));
    } catch (IOException ex) {
        // ignore
        LOG.debug("Caught exception when loading configuration: {}: moving on...", ex.getMessage());
    } finally {
        // Cannot delete the file here because addDTSiteResource which eventually calls Configuration.reloadConfiguration
        // does not actually reload the configuration.  The file is actually read later and it needs to exist.
        //
        //if (targetGlobalFile != null) {
        //targetGlobalFile.delete();
        //}
        IOUtils.closeQuietly(fs);
    }

    //Validate loggers-level settings
    String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL);
    if (loggersLevel != null) {
        String targets[] = loggersLevel.split(",");
        Preconditions.checkArgument(targets.length > 0, "zero loggers level");
        for (String target : targets) {
            String parts[] = target.split(":");
            Preconditions.checkArgument(parts.length == 2, "incorrect " + target);
            Preconditions.checkArgument(ConfigValidator.validateLoggersLevel(parts[0], parts[1]),
                    "incorrect " + target);
        }
    }
    convertDeprecatedProperties(conf);

    //
    // The ridiculous default RESOURCEMANAGER_CONNECT_MAX_WAIT_MS from hadoop is 15 minutes (!!!!), which actually translates to 20 minutes with the connect interval.
    // That means if there is anything wrong with YARN or if YARN is not running, the caller has to wait for up to 20 minutes until it gets an error.
    // We are overriding this to be 10 seconds maximum.
    //

    int rmConnectMaxWait = conf.getInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS,
            YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS);
    if (rmConnectMaxWait > RESOURCEMANAGER_CONNECT_MAX_WAIT_MS_OVERRIDE) {
        LOG.info("Overriding {} assigned value of {} to {} because the assigned value is too big.",
                YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, rmConnectMaxWait,
                RESOURCEMANAGER_CONNECT_MAX_WAIT_MS_OVERRIDE);
        conf.setInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS,
                RESOURCEMANAGER_CONNECT_MAX_WAIT_MS_OVERRIDE);
        int rmConnectRetryInterval = conf.getInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS,
                YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS);
        int defaultRetryInterval = Math.max(500, RESOURCEMANAGER_CONNECT_MAX_WAIT_MS_OVERRIDE / 5);
        if (rmConnectRetryInterval > defaultRetryInterval) {
            LOG.info("Overriding {} assigned value of {} to {} because the assigned value is too big.",
                    YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, rmConnectRetryInterval,
                    defaultRetryInterval);
            conf.setInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, defaultRetryInterval);
        }
    }
    LOG.info(" conf object in stramclient {}", conf);
    return conf;
}

From source file:com.digitalpebble.behemoth.DocumentFilter.java

License:Apache License

/** Builds a document filter given a Configuration object **/
public static DocumentFilter getFilters(Configuration conf) {
    // extracts the patterns
    Map<String, String> PositiveKVpatterns = conf.getValByRegex(DocumentFilterParamNamePrefixKeep + ".+");
    Map<String, String> NegativeKVpatterns = conf.getValByRegex(DocumentFilterParamNamePrefixSkip + ".+");

    Map<String, String> tmpMap;

    DocumentFilter filter = new DocumentFilter();

    filter.medataMode = conf.get(DocumentFilterParamNameMode, "AND");

    // has to be either prositive or negative but not both
    if (PositiveKVpatterns.size() > 0 && NegativeKVpatterns.size() > 0) {
        throw new RuntimeException(
                "Can't have positive AND negative document filters - check your configuration");
    } else if (PositiveKVpatterns.size() > 0) {
        filter.negativeMode = false;// w  w  w  . j  av a2s. c  om
        tmpMap = PositiveKVpatterns;
    } else {
        filter.negativeMode = true;
        tmpMap = NegativeKVpatterns;
    }

    // normalise the keys
    Iterator<Entry<String, String>> kviter = tmpMap.entrySet().iterator();
    while (kviter.hasNext()) {
        Entry<String, String> ent = kviter.next();
        String k = ent.getKey();
        String v = ent.getValue();
        k = k.substring(DocumentFilterParamNamePrefixKeep.length());

        StringBuffer message = new StringBuffer();
        if (filter.negativeMode)
            message.append("Negative ");
        else
            message.append("Positive ");
        message.append("filter found : ").append(k).append(" = ").append(v);
        LOG.info(message.toString());

        filter.KVpatterns.put(k, v);
    }

    String URLPatternS = conf.get(DocumentFilterParamNameURLFilterKeep, "");
    if (URLPatternS.length() > 0) {
        try {
            filter.URLRegex = Pattern.compile(URLPatternS);
        } catch (PatternSyntaxException e) {
            filter.URLRegex = null;
            LOG.error("Can't create regular expression for URL from " + URLPatternS);
        }
    }

    String MTPatternS = conf.get(DocumentFilterParamNameMimeTypeFilterKeep, "");
    if (MTPatternS.length() > 0) {
        try {
            filter.MimetypeRegex = Pattern.compile(MTPatternS);
        } catch (PatternSyntaxException e) {
            filter.MimetypeRegex = null;
            LOG.error("Can't create regular expression for MimeType from " + MTPatternS);
        }
    }

    filter.maxContentLength = conf.getInt(DocumentFilterParamNameLength, -1);

    return filter;
}

From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java

License:Apache License

/**
 * Create a line reader that reads from the given stream using the
 * <code>io.file.buffer.size</code> specified in the given
 * <code>Configuration</code>.
 * @param in input stream/*from  www .  j  ava 2  s. c  om*/
 * @param conf configuration
 * @throws IOException
 */
public LineReader(InputStream in, Configuration conf) throws IOException {
    this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
}

From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java

License:Apache License

/**
 * Create a line reader that reads from the given stream using the
 * <code>io.file.buffer.size</code> specified in the given
 * <code>Configuration</code>, and using a custom delimiter of array of
 * bytes./*from w  w w.  j av  a  2s  .  c om*/
 * @param in input stream
 * @param conf configuration
 * @param recordDelimiterBytes The delimiter
 * @throws IOException
 */
public LineReader(InputStream in, Configuration conf, byte[] recordDelimiterBytes) throws IOException {
    this.in = in;
    this.bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
    this.buffer = new byte[this.bufferSize];
    this.recordDelimiterBytes = recordDelimiterBytes;
}

From source file:com.dinglicom.clouder.mapreduce.input.LineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    System.out.println("-------------------length:" + split.getLength() + "\tposition:" + split.getStart());
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*from  ww  w . jav a2 s.  co m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    key = new Text(FileToCDRType.getTypeByPath(file.getName()));
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.elex.dmp.lda.CachingCVB0Mapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    log.info("Retrieving configuration");
    Configuration conf = context.getConfiguration();
    float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN);
    float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN);
    long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L);
    numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1);
    int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1);
    int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1);
    int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4);
    maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10);
    float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f);

    log.info("Initializing read model");
    TopicModel readModel;/*from w ww .  j a v  a2  s.  com*/
    Path[] modelPaths = CVB0Driver.getModelPaths(conf);
    if (modelPaths != null && modelPaths.length > 0) {
        readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
    } else {
        log.info("No model files found");
        readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null,
                numTrainThreads, modelWeight);
    }

    log.info("Initializing write model");
    TopicModel writeModel = modelWeight == 1
            ? new TopicModel(numTopics, numTerms, eta, alpha, null, numUpdateThreads)
            : readModel;

    log.info("Initializing model trainer");
    modelTrainer = new ModelTrainer(readModel, writeModel, numTrainThreads, numTopics, numTerms);
    modelTrainer.start();
}