Example usage for org.apache.hadoop.conf Configuration getClass

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getClass.

Prototype

public Class<?> getClass(String name, Class<?> defaultValue)

Source Link

Document

Get the value of the name property as a Class.

Usage

From source file:edu.umn.cs.sthadoop.mapreduce.SpatioTemporalInputFormat.java

License:Open Source License

@Override
public RecordReader<K, Iterable<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path path;/*from   w  w  w. ja  va  2 s .c  o  m*/
    String extension;
    if (split instanceof FileSplit) {
        FileSplit fsplit = (FileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = fsplit.getPath());
    } else if (split instanceof CombineFileSplit) {
        CombineFileSplit csplit = (CombineFileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = csplit.getPath(0));
    } else {
        throw new RuntimeException("Cannot process plits of type " + split.getClass());
    }
    // If this extension is for a compression, skip it and take the previous
    // extension
    if (extension.equals("hdf")) {
        // HDF File. Create HDFRecordReader
        return (RecordReader) new HDFRecordReader();
    }
    if (extension.equals("rtree")) {
        // File is locally indexed as RTree
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // For backward compatibility, check if the file is RTree indexed from
    // its signature
    Configuration conf = context != null ? context.getConfiguration() : new Configuration();
    if (SpatialSite.isRTree(path.getFileSystem(conf), path)) {
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // Check if a custom record reader is configured with this extension
    Class<?> recordReaderClass = conf.getClass("SpatialInputFormat." + extension + ".recordreader",
            SpatioTemporalRecordReader.class);
    try {
        return (RecordReader<K, Iterable<V>>) recordReaderClass.newInstance();
    } catch (InstantiationException e) {
    } catch (IllegalAccessException e) {
    }
    // Use the default SpatioTemporalRecordReader if none of the above worked
    return (RecordReader) new SpatioTemporalRecordReader<V>();
}

From source file:grakn.core.server.session.computer.GraknSparkComputer.java

License:Open Source License

@SuppressWarnings("PMD.UnusedFormalParameter")
private Future<ComputerResult> submitWithExecutor() {
    jobGroupId = Integer.toString(ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE));
    String jobDescription = this.vertexProgram == null ? this.mapReducers.toString()
            : this.vertexProgram + "+" + this.mapReducers;

    // Use different output locations
    this.sparkConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION,
            this.sparkConfiguration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + jobGroupId);

    updateConfigKeys(sparkConfiguration);

    final Future<ComputerResult> result = computerService.submit(() -> {
        final long startTime = System.currentTimeMillis();

        //////////////////////////////////////////////////
        /////// PROCESS SHIM AND SYSTEM PROPERTIES ///////
        //////////////////////////////////////////////////
        final String shimService = KryoSerializer.class.getCanonicalName()
                .equals(this.sparkConfiguration.getString(Constants.SPARK_SERIALIZER, null))
                        ? UnshadedKryoShimService.class.getCanonicalName()
                        : HadoopPoolShimService.class.getCanonicalName();
        this.sparkConfiguration.setProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, shimService);
        ///////////
        final StringBuilder params = new StringBuilder();
        this.sparkConfiguration.getKeys().forEachRemaining(key -> {
            if (KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES.contains(key)) {
                params.append(" -D").append("tinkerpop.").append(key).append("=")
                        .append(this.sparkConfiguration.getProperty(key));
                System.setProperty("tinkerpop." + key, this.sparkConfiguration.getProperty(key).toString());
            }/* www .j  a va 2  s.c o m*/
        });
        if (params.length() > 0) {
            this.sparkConfiguration.setProperty(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS,
                    (this.sparkConfiguration.getString(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, "")
                            + params.toString()).trim());
            this.sparkConfiguration.setProperty(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
                    (this.sparkConfiguration.getString(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, "")
                            + params.toString()).trim());
        }
        KryoShimServiceLoader.applyConfiguration(this.sparkConfiguration);
        //////////////////////////////////////////////////
        //////////////////////////////////////////////////
        //////////////////////////////////////////////////
        // apache and hadoop configurations that are used throughout the graph computer computation
        final org.apache.commons.configuration.Configuration graphComputerConfiguration = new HadoopConfiguration(
                this.sparkConfiguration);
        if (!graphComputerConfiguration.containsKey(Constants.SPARK_SERIALIZER)) {
            graphComputerConfiguration.setProperty(Constants.SPARK_SERIALIZER,
                    KryoSerializer.class.getCanonicalName());
            if (!graphComputerConfiguration.containsKey(Constants.SPARK_KRYO_REGISTRATOR)) {
                graphComputerConfiguration.setProperty(Constants.SPARK_KRYO_REGISTRATOR,
                        GryoRegistrator.class.getCanonicalName());
            }
        }
        graphComputerConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES,
                this.persist.equals(GraphComputer.Persist.EDGES));

        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(graphComputerConfiguration);

        final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration);
        final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean skipPartitioner = graphComputerConfiguration
                .getBoolean(Constants.GREMLIN_SPARK_SKIP_PARTITIONER, false);
        final boolean skipPersist = graphComputerConfiguration
                .getBoolean(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE, false);

        if (inputFromHDFS) {
            String inputLocation = Constants
                    .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION),
                            fileSystemStorage)
                    .orElse(null);
            if (null != inputLocation) {
                try {
                    graphComputerConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR,
                            FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath()
                                    .toString());
                    hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR,
                            FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath()
                                    .toString());
                } catch (final IOException e) {
                    throw new IllegalStateException(e.getMessage(), e);
                }
            }
        }

        final InputRDD inputRDD;
        final OutputRDD outputRDD;
        final boolean filtered;
        try {
            inputRDD = InputRDD.class.isAssignableFrom(
                    hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class))
                            ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER,
                                    InputRDD.class, InputRDD.class).newInstance()
                            : InputFormatRDD.class.newInstance();
            outputRDD = OutputRDD.class.isAssignableFrom(
                    hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class))
                            ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER,
                                    OutputRDD.class, OutputRDD.class).newInstance()
                            : OutputFormatRDD.class.newInstance();

            // if the input class can filter on load, then set the filters
            if (inputRDD instanceof InputFormatRDD
                    && GraphFilterAware.class.isAssignableFrom(hadoopConfiguration.getClass(
                            Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class))) {
                GraphFilterAware.storeGraphFilter(graphComputerConfiguration, hadoopConfiguration,
                        this.graphFilter);
                filtered = false;
            } else if (inputRDD instanceof GraphFilterAware) {
                ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter);
                filtered = false;
            } else
                filtered = this.graphFilter.hasFilter();
        } catch (final InstantiationException | IllegalAccessException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }

        // create the spark context from the graph computer configuration
        final JavaSparkContext sparkContext = new JavaSparkContext(Spark.create(hadoopConfiguration));
        final Storage sparkContextStorage = SparkContextStorage.open();

        sparkContext.setJobGroup(jobGroupId, jobDescription);

        GraknSparkMemory memory = null;
        // delete output location
        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
        if (null != outputLocation) {
            if (outputToHDFS && fileSystemStorage.exists(outputLocation)) {
                fileSystemStorage.rm(outputLocation);
            }
            if (outputToSpark && sparkContextStorage.exists(outputLocation)) {
                sparkContextStorage.rm(outputLocation);
            }
        }

        // the Spark application name will always be set by SparkContextStorage,
        // thus, INFO the name to make it easier to debug
        logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX
                + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "["
                + this.mapReducers + "]");

        // add the project jars to the cluster
        this.loadJars(hadoopConfiguration, sparkContext);
        updateLocalConfiguration(sparkContext, hadoopConfiguration);

        // create a message-passing friendly rdd from the input rdd
        boolean partitioned = false;
        JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(graphComputerConfiguration,
                sparkContext);

        // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting
        if (filtered) {
            this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter);
            loadedGraphRDD = GraknSparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter);
        }
        // if the loaded graph RDD is already partitioned use that partitioner,
        // else partition it with HashPartitioner
        if (loadedGraphRDD.partitioner().isPresent()) {
            this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: "
                    + loadedGraphRDD.partitioner().get());
        } else {
            if (!skipPartitioner) {
                final Partitioner partitioner = new HashPartitioner(
                        this.workersSet ? this.workers : loadedGraphRDD.partitions().size());
                this.logger.debug("Partitioning the loaded graphRDD: " + partitioner);
                loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner);
                partitioned = true;
                assert loadedGraphRDD.partitioner().isPresent();
            } else {
                // no easy way to test this with a test case
                assert skipPartitioner == !loadedGraphRDD.partitioner().isPresent();

                this.logger.debug("Partitioning has been skipped for the loaded graphRDD via "
                        + Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            }
        }
        // if the loaded graphRDD was already partitioned previous,
        // then this coalesce/repartition will not take place
        if (this.workersSet) {
            // ensures that the loaded graphRDD does not have more partitions than workers
            if (loadedGraphRDD.partitions().size() > this.workers) {
                loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
            } else {
                // ensures that the loaded graphRDD does not have less partitions than workers
                if (loadedGraphRDD.partitions().size() < this.workers) {
                    loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
                }
            }
        }
        // persist the vertex program loaded graph as specified by configuration
        // or else use default cache() which is MEMORY_ONLY
        if (!skipPersist && (!inputFromSpark || partitioned || filtered)) {
            loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString(
                    hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
        }
        // final graph with view
        // (for persisting and/or mapReducing -- may be null and thus, possible to save space/time)
        JavaPairRDD<Object, VertexWritable> computedGraphRDD = null;
        try {
            ////////////////////////////////
            // process the vertex program //
            ////////////////////////////////
            if (null != this.vertexProgram) {
                memory = new GraknSparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                /////////////////
                // if there is a registered VertexProgramInterceptor, use it to bypass the GraphComputer semantics
                if (graphComputerConfiguration
                        .containsKey(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)) {
                    try {
                        final GraknSparkVertexProgramInterceptor<VertexProgram> interceptor = (GraknSparkVertexProgramInterceptor) Class
                                .forName(graphComputerConfiguration
                                        .getString(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR))
                                .newInstance();
                        computedGraphRDD = interceptor.apply(this.vertexProgram, loadedGraphRDD, memory);
                    } catch (final ClassNotFoundException | IllegalAccessException | InstantiationException e) {
                        throw new IllegalStateException(e.getMessage());
                    }
                } else {
                    // standard GraphComputer semantics
                    // get a configuration that will be propagated to all workers
                    final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration();
                    this.vertexProgram.storeState(vertexProgramConfiguration);
                    // set up the vertex program and wire up configurations
                    this.vertexProgram.setup(memory);
                    JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null;
                    memory.broadcastMemory(sparkContext);
                    // execute the vertex program
                    while (true) {
                        if (Thread.interrupted()) {
                            sparkContext.cancelAllJobs();
                            throw new TraversalInterruptedException();
                        }
                        memory.setInExecute(true);
                        viewIncomingRDD = GraknSparkExecutor.executeVertexProgramIteration(loadedGraphRDD,
                                viewIncomingRDD, memory, graphComputerConfiguration,
                                vertexProgramConfiguration);
                        memory.setInExecute(false);
                        if (this.vertexProgram.terminate(memory)) {
                            break;
                        } else {
                            memory.incrIteration();
                            memory.broadcastMemory(sparkContext);
                        }
                    }
                    // if the graph will be continued to be used (persisted or mapreduced),
                    // then generate a view+graph
                    if ((null != outputRDD && !this.persist.equals(Persist.NOTHING))
                            || !this.mapReducers.isEmpty()) {
                        computedGraphRDD = GraknSparkExecutor.prepareFinalGraphRDD(loadedGraphRDD,
                                viewIncomingRDD, this.vertexProgram.getVertexComputeKeys());
                        assert null != computedGraphRDD && computedGraphRDD != loadedGraphRDD;
                    } else {
                        // ensure that the computedGraphRDD was not created
                        assert null == computedGraphRDD;
                    }
                }
                /////////////////
                memory.complete(); // drop all transient memory keys
                // write the computed graph to the respective output (rdd or output format)
                if (null != outputRDD && !this.persist.equals(Persist.NOTHING)) {
                    // the logic holds that a computeGraphRDD must be created at this point
                    assert null != computedGraphRDD;

                    outputRDD.writeGraphRDD(graphComputerConfiguration, computedGraphRDD);
                }
            }

            final boolean computedGraphCreated = computedGraphRDD != null && computedGraphRDD != loadedGraphRDD;
            if (!computedGraphCreated) {
                computedGraphRDD = loadedGraphRDD;
            }

            final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory);

            //////////////////////////////
            // process the map reducers //
            //////////////////////////////
            if (!this.mapReducers.isEmpty()) {
                // create a mapReduceRDD for executing the map reduce jobs on
                JavaPairRDD<Object, VertexWritable> mapReduceRDD = computedGraphRDD;
                if (computedGraphCreated && !outputToSpark) {
                    // drop all the edges of the graph as they are not used in mapReduce processing
                    mapReduceRDD = computedGraphRDD.mapValues(vertexWritable -> {
                        vertexWritable.get().dropEdges(Direction.BOTH);
                        return vertexWritable;
                    });
                    // if there is only one MapReduce to execute, don't bother wasting the clock cycles.
                    if (this.mapReducers.size() > 1) {
                        mapReduceRDD = mapReduceRDD.persist(StorageLevel.fromString(hadoopConfiguration
                                .get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
                    }
                }

                for (final MapReduce mapReduce : this.mapReducers) {
                    // execute the map reduce job
                    final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration(
                            graphComputerConfiguration);
                    mapReduce.storeState(newApacheConfiguration);
                    // map
                    final JavaPairRDD mapRDD = GraknSparkExecutor.executeMap(mapReduceRDD, mapReduce,
                            newApacheConfiguration);
                    // combine
                    final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE)
                            ? GraknSparkExecutor.executeCombine(mapRDD, newApacheConfiguration)
                            : mapRDD;
                    // reduce
                    final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE)
                            ? GraknSparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration)
                            : combineRDD;
                    // write the map reduce output back to disk and computer result memory
                    if (null != outputRDD) {
                        mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD(
                                graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD));
                    }
                }
                // if the mapReduceRDD is not simply the computed graph, unpersist the mapReduceRDD
                if (computedGraphCreated && !outputToSpark) {
                    assert loadedGraphRDD != computedGraphRDD;
                    assert mapReduceRDD != computedGraphRDD;
                    mapReduceRDD.unpersist();
                } else {
                    assert mapReduceRDD == computedGraphRDD;
                }
            }

            // unpersist the loaded graph if it will not be used again (no PersistedInputRDD)
            // if the graphRDD was loaded from Spark, but then partitioned or filtered, its a different RDD
            if (!inputFromSpark || partitioned || filtered) {
                loadedGraphRDD.unpersist();
            }
            // unpersist the computed graph if it will not be used again (no PersistedOutputRDD)
            // if the computed graph is the loadedGraphRDD because it was not mutated and not-unpersisted,
            // then don't unpersist the computedGraphRDD/loadedGraphRDD
            if ((!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING))
                    && computedGraphCreated) {
                computedGraphRDD.unpersist();
            }
            // delete any file system or rdd data if persist nothing
            if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) {
                if (outputToHDFS) {
                    fileSystemStorage.rm(outputLocation);
                }
                if (outputToSpark) {
                    sparkContextStorage.rm(outputLocation);
                }
            }
            // update runtime and return the newly computed graph
            finalMemory.setRuntime(System.currentTimeMillis() - startTime);
            // clear properties that should not be propagated in an OLAP chain
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_GRAPH_FILTER);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            return new DefaultComputerResult(InputOutputHelper.getOutputGraph(graphComputerConfiguration,
                    this.resultGraph, this.persist), finalMemory.asImmutable());
        } catch (Exception e) {
            // So it throws the same exception as tinker does
            throw new RuntimeException(e);
        }
    });
    computerService.shutdown();
    return result;
}

From source file:io.hops.erasure_coding.Codec.java

License:Apache License

public ErasureCode createErasureCode(Configuration conf) {
    // Create the scheduler
    Class<?> erasureCode = null;
    try {//from w  w w .  j a v a2 s. c om
        erasureCode = conf.getClass(ERASURE_CODE_KEY_PREFIX + this.id,
                conf.getClassByName(this.erasureCodeClass));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
    ErasureCode code = (ErasureCode) ReflectionUtils.newInstance(erasureCode, conf);
    code.init(this);
    return code;
}

From source file:org.apache.blur.server.TableContext.java

License:Apache License

private static TableContext createInternal(TableDescriptor tableDescriptor, boolean remote, Iface client,
        String name, String tableUri) {
    TableContext tableContext;/*from w  w  w  . ja  v a2s .com*/
    LOG.info("Creating table context for table [{0}]", name);
    Configuration configuration = getSystemConfiguration();
    BlurConfiguration blurConfiguration = getSystemBlurConfiguration();
    Map<String, String> tableProperties = tableDescriptor.getTableProperties();
    if (tableProperties != null) {
        for (Entry<String, String> prop : tableProperties.entrySet()) {
            configuration.set(prop.getKey(), prop.getValue());
            blurConfiguration.set(prop.getKey(), prop.getValue());
        }
    }

    tableContext = new TableContext();
    tableContext._configuration = configuration;
    tableContext._blurConfiguration = blurConfiguration;
    tableContext._tablePath = new Path(tableUri);

    tableContext._defaultFieldName = SUPER;
    tableContext._table = name;
    tableContext._descriptor = tableDescriptor;
    tableContext._timeBetweenCommits = configuration.getLong(BLUR_SHARD_TIME_BETWEEN_COMMITS, 60000);
    tableContext._timeBetweenRefreshs = configuration.getLong(BLUR_SHARD_TIME_BETWEEN_REFRESHS, 5000);
    tableContext._defaultPrimeDocTerm = new Term(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE);
    tableContext._defaultScoreType = ScoreType.SUPER;

    // TODO make configurable
    tableContext._discoverableFields = new HashSet<String>(
            Arrays.asList(BlurConstants.ROW_ID, BlurConstants.RECORD_ID, BlurConstants.FAMILY));

    // TODO make configurable
    tableContext._accessControlFactory = new FilterAccessControlFactory();

    boolean strict = tableDescriptor.isStrictTypes();
    String defaultMissingFieldType = tableDescriptor.getDefaultMissingFieldType();
    boolean defaultMissingFieldLessIndexing = tableDescriptor.isDefaultMissingFieldLessIndexing();
    Map<String, String> defaultMissingFieldProps = emptyIfNull(tableDescriptor.getDefaultMissingFieldProps());

    Path storagePath = new Path(tableContext._tablePath, TYPES);
    try {
        FieldManager fieldManager;
        if (remote) {
            fieldManager = new ThriftFieldManager(SUPER, new NoStopWordStandardAnalyzer(), strict,
                    defaultMissingFieldType, defaultMissingFieldLessIndexing, defaultMissingFieldProps,
                    configuration, client, name);
        } else {
            fieldManager = new HdfsFieldManager(SUPER, new NoStopWordStandardAnalyzer(), storagePath,
                    configuration, strict, defaultMissingFieldType, defaultMissingFieldLessIndexing,
                    defaultMissingFieldProps);
        }
        loadCustomTypes(tableContext, blurConfiguration, fieldManager);
        fieldManager.loadFromStorage();
        tableContext._fieldManager = fieldManager;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    Class<?> c1 = configuration.getClass(BLUR_SHARD_INDEX_DELETION_POLICY_MAXAGE,
            KeepOnlyLastCommitDeletionPolicy.class);
    tableContext._indexDeletionPolicy = (IndexDeletionPolicy) configure(
            ReflectionUtils.newInstance(c1, configuration), tableContext);
    Class<?> c2 = configuration.getClass(BLUR_SHARD_INDEX_SIMILARITY, FairSimilarity.class);
    tableContext._similarity = (Similarity) configure(ReflectionUtils.newInstance(c2, configuration),
            tableContext);

    String readInterceptorClass = blurConfiguration.get(BLUR_SHARD_READ_INTERCEPTOR);
    if (readInterceptorClass == null || readInterceptorClass.trim().isEmpty()) {
        tableContext._readInterceptor = DEFAULT_INTERCEPTOR;
    } else {
        try {
            @SuppressWarnings("unchecked")
            Class<? extends ReadInterceptor> clazz = (Class<? extends ReadInterceptor>) Class
                    .forName(readInterceptorClass);
            Constructor<? extends ReadInterceptor> constructor = clazz
                    .getConstructor(new Class[] { BlurConfiguration.class });
            tableContext._readInterceptor = constructor.newInstance(blurConfiguration);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    tableContext._similarity = (Similarity) configure(ReflectionUtils.newInstance(c2, configuration),
            tableContext);
    // DEFAULT_INTERCEPTOR

    _cache.put(name, tableContext);
    return tableContext.clone();
}

From source file:org.apache.crunch.types.avro.AvroMode.java

License:Apache License

@SuppressWarnings("unchecked")
void setFromConfiguration(Configuration conf) {
    // although the shuffle and input/output use different properties for mode,
    // this is shared - only one ReaderWriterFactory can be used.
    Class<?> factoryClass = conf.getClass(propName, this.getClass());
    if (factoryClass != this.getClass()) {
        this.factory = (ReaderWriterFactory) ReflectionUtils.newInstance(factoryClass, conf);
    }/* w  ww.ja  va2s  . co m*/
}

From source file:org.apache.crunch.types.avro.SafeAvroSerialization.java

License:Apache License

/**
 * Returns the specified map output deserializer. Defaults to the final output
 * deserializer if no map output schema was specified.
 *//*from   ww  w .j  a v  a 2s. c  o m*/
public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) {
    boolean isKey = AvroKey.class.isAssignableFrom(c);
    Configuration conf = getConf();
    Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf))
            : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf));

    DatumReader<T> datumReader = null;
    if (conf.getBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, false)) {
        ReflectDataFactory factory = (ReflectDataFactory) ReflectionUtils
                .newInstance(conf.getClass("crunch.reflectdatafactory", ReflectDataFactory.class), conf);
        datumReader = factory.getReader(schema);
    } else {
        datumReader = new SpecificDatumReader<T>(schema);
    }
    return new AvroWrapperDeserializer(datumReader, isKey);
}

From source file:org.apache.giraph.graph.BspUtils.java

License:Apache License

/**
 * Get the user's subclassed vertex index class.
 *
 * @param <I> Vertex id/*ww w  . j ava2  s.c  o  m*/
 * @param conf Configuration to check
 * @return User's vertex index class
 */
@SuppressWarnings("unchecked")
public static <I extends Writable> Class<I> getVertexIndexClass(Configuration conf) {
    return (Class<I>) conf.getClass(GiraphJob.VERTEX_INDEX_CLASS, WritableComparable.class);
}

From source file:org.apache.giraph.graph.BspUtils.java

License:Apache License

/**
 * Get the user's subclassed vertex value class.
 *
 * @param <V> Vertex data/*from  w ww . j  a va 2  s.c  o m*/
 * @param conf Configuration to check
 * @return User's vertex value class
 */
@SuppressWarnings("unchecked")
public static <V extends Writable> Class<V> getVertexValueClass(Configuration conf) {
    return (Class<V>) conf.getClass(GiraphJob.VERTEX_VALUE_CLASS, Writable.class);
}

From source file:org.apache.giraph.graph.BspUtils.java

License:Apache License

/**
 * Get the user's subclassed edge value class.
 *
 * @param <E> Edge data/*from w ww .  jav a2s . c  o m*/
 * @param conf Configuration to check
 * @return User's vertex edge value class
 */
@SuppressWarnings("unchecked")
public static <E extends Writable> Class<E> getEdgeValueClass(Configuration conf) {
    return (Class<E>) conf.getClass(GiraphJob.EDGE_VALUE_CLASS, Writable.class);
}

From source file:org.apache.giraph.graph.BspUtils.java

License:Apache License

/**
 * Get the user's subclassed vertex message value class.
 *
 * @param <M> Message data/*from   w w w .  j  a  v  a  2s. com*/
 * @param conf Configuration to check
 * @return User's vertex message value class
 */
@SuppressWarnings("unchecked")
public static <M extends Writable> Class<M> getMessageValueClass(Configuration conf) {
    return (Class<M>) conf.getClass(GiraphJob.MESSAGE_VALUE_CLASS, Writable.class);
}