Example usage for org.apache.hadoop.conf Configuration getClass

List of usage examples for org.apache.hadoop.conf Configuration getClass

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getClass.

Prototype

public Class<?> getClass(String name, Class<?> defaultValue) 

Source Link

Document

Get the value of the name property as a Class.

Usage

From source file:edu.umn.cs.sthadoop.mapreduce.SpatioTemporalInputFormat.java

License:Open Source License

@Override
public RecordReader<K, Iterable<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path path;/*from   w  w  w. ja  va  2 s .c  o  m*/
    String extension;
    if (split instanceof FileSplit) {
        FileSplit fsplit = (FileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = fsplit.getPath());
    } else if (split instanceof CombineFileSplit) {
        CombineFileSplit csplit = (CombineFileSplit) split;
        extension = FileUtil.getExtensionWithoutCompression(path = csplit.getPath(0));
    } else {
        throw new RuntimeException("Cannot process plits of type " + split.getClass());
    }
    // If this extension is for a compression, skip it and take the previous
    // extension
    if (extension.equals("hdf")) {
        // HDF File. Create HDFRecordReader
        return (RecordReader) new HDFRecordReader();
    }
    if (extension.equals("rtree")) {
        // File is locally indexed as RTree
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // For backward compatibility, check if the file is RTree indexed from
    // its signature
    Configuration conf = context != null ? context.getConfiguration() : new Configuration();
    if (SpatialSite.isRTree(path.getFileSystem(conf), path)) {
        return (RecordReader) new RTreeRecordReader3<V>();
    }
    // Check if a custom record reader is configured with this extension
    Class<?> recordReaderClass = conf.getClass("SpatialInputFormat." + extension + ".recordreader",
            SpatioTemporalRecordReader.class);
    try {
        return (RecordReader<K, Iterable<V>>) recordReaderClass.newInstance();
    } catch (InstantiationException e) {
    } catch (IllegalAccessException e) {
    }
    // Use the default SpatioTemporalRecordReader if none of the above worked
    return (RecordReader) new SpatioTemporalRecordReader<V>();
}

From source file:grakn.core.server.session.computer.GraknSparkComputer.java

License:Open Source License

@SuppressWarnings("PMD.UnusedFormalParameter")
private Future<ComputerResult> submitWithExecutor() {
    jobGroupId = Integer.toString(ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE));
    String jobDescription = this.vertexProgram == null ? this.mapReducers.toString()
            : this.vertexProgram + "+" + this.mapReducers;

    // Use different output locations
    this.sparkConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION,
            this.sparkConfiguration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + jobGroupId);

    updateConfigKeys(sparkConfiguration);

    final Future<ComputerResult> result = computerService.submit(() -> {
        final long startTime = System.currentTimeMillis();

        //////////////////////////////////////////////////
        /////// PROCESS SHIM AND SYSTEM PROPERTIES ///////
        //////////////////////////////////////////////////
        final String shimService = KryoSerializer.class.getCanonicalName()
                .equals(this.sparkConfiguration.getString(Constants.SPARK_SERIALIZER, null))
                        ? UnshadedKryoShimService.class.getCanonicalName()
                        : HadoopPoolShimService.class.getCanonicalName();
        this.sparkConfiguration.setProperty(KryoShimServiceLoader.KRYO_SHIM_SERVICE, shimService);
        ///////////
        final StringBuilder params = new StringBuilder();
        this.sparkConfiguration.getKeys().forEachRemaining(key -> {
            if (KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES.contains(key)) {
                params.append(" -D").append("tinkerpop.").append(key).append("=")
                        .append(this.sparkConfiguration.getProperty(key));
                System.setProperty("tinkerpop." + key, this.sparkConfiguration.getProperty(key).toString());
            }/* www .j  a va 2  s.c o m*/
        });
        if (params.length() > 0) {
            this.sparkConfiguration.setProperty(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS,
                    (this.sparkConfiguration.getString(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, "")
                            + params.toString()).trim());
            this.sparkConfiguration.setProperty(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
                    (this.sparkConfiguration.getString(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, "")
                            + params.toString()).trim());
        }
        KryoShimServiceLoader.applyConfiguration(this.sparkConfiguration);
        //////////////////////////////////////////////////
        //////////////////////////////////////////////////
        //////////////////////////////////////////////////
        // apache and hadoop configurations that are used throughout the graph computer computation
        final org.apache.commons.configuration.Configuration graphComputerConfiguration = new HadoopConfiguration(
                this.sparkConfiguration);
        if (!graphComputerConfiguration.containsKey(Constants.SPARK_SERIALIZER)) {
            graphComputerConfiguration.setProperty(Constants.SPARK_SERIALIZER,
                    KryoSerializer.class.getCanonicalName());
            if (!graphComputerConfiguration.containsKey(Constants.SPARK_KRYO_REGISTRATOR)) {
                graphComputerConfiguration.setProperty(Constants.SPARK_KRYO_REGISTRATOR,
                        GryoRegistrator.class.getCanonicalName());
            }
        }
        graphComputerConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES,
                this.persist.equals(GraphComputer.Persist.EDGES));

        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(graphComputerConfiguration);

        final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration);
        final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean skipPartitioner = graphComputerConfiguration
                .getBoolean(Constants.GREMLIN_SPARK_SKIP_PARTITIONER, false);
        final boolean skipPersist = graphComputerConfiguration
                .getBoolean(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE, false);

        if (inputFromHDFS) {
            String inputLocation = Constants
                    .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION),
                            fileSystemStorage)
                    .orElse(null);
            if (null != inputLocation) {
                try {
                    graphComputerConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR,
                            FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath()
                                    .toString());
                    hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR,
                            FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath()
                                    .toString());
                } catch (final IOException e) {
                    throw new IllegalStateException(e.getMessage(), e);
                }
            }
        }

        final InputRDD inputRDD;
        final OutputRDD outputRDD;
        final boolean filtered;
        try {
            inputRDD = InputRDD.class.isAssignableFrom(
                    hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class))
                            ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER,
                                    InputRDD.class, InputRDD.class).newInstance()
                            : InputFormatRDD.class.newInstance();
            outputRDD = OutputRDD.class.isAssignableFrom(
                    hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class))
                            ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER,
                                    OutputRDD.class, OutputRDD.class).newInstance()
                            : OutputFormatRDD.class.newInstance();

            // if the input class can filter on load, then set the filters
            if (inputRDD instanceof InputFormatRDD
                    && GraphFilterAware.class.isAssignableFrom(hadoopConfiguration.getClass(
                            Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class))) {
                GraphFilterAware.storeGraphFilter(graphComputerConfiguration, hadoopConfiguration,
                        this.graphFilter);
                filtered = false;
            } else if (inputRDD instanceof GraphFilterAware) {
                ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter);
                filtered = false;
            } else
                filtered = this.graphFilter.hasFilter();
        } catch (final InstantiationException | IllegalAccessException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }

        // create the spark context from the graph computer configuration
        final JavaSparkContext sparkContext = new JavaSparkContext(Spark.create(hadoopConfiguration));
        final Storage sparkContextStorage = SparkContextStorage.open();

        sparkContext.setJobGroup(jobGroupId, jobDescription);

        GraknSparkMemory memory = null;
        // delete output location
        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
        if (null != outputLocation) {
            if (outputToHDFS && fileSystemStorage.exists(outputLocation)) {
                fileSystemStorage.rm(outputLocation);
            }
            if (outputToSpark && sparkContextStorage.exists(outputLocation)) {
                sparkContextStorage.rm(outputLocation);
            }
        }

        // the Spark application name will always be set by SparkContextStorage,
        // thus, INFO the name to make it easier to debug
        logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX
                + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "["
                + this.mapReducers + "]");

        // add the project jars to the cluster
        this.loadJars(hadoopConfiguration, sparkContext);
        updateLocalConfiguration(sparkContext, hadoopConfiguration);

        // create a message-passing friendly rdd from the input rdd
        boolean partitioned = false;
        JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(graphComputerConfiguration,
                sparkContext);

        // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting
        if (filtered) {
            this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter);
            loadedGraphRDD = GraknSparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter);
        }
        // if the loaded graph RDD is already partitioned use that partitioner,
        // else partition it with HashPartitioner
        if (loadedGraphRDD.partitioner().isPresent()) {
            this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: "
                    + loadedGraphRDD.partitioner().get());
        } else {
            if (!skipPartitioner) {
                final Partitioner partitioner = new HashPartitioner(
                        this.workersSet ? this.workers : loadedGraphRDD.partitions().size());
                this.logger.debug("Partitioning the loaded graphRDD: " + partitioner);
                loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner);
                partitioned = true;
                assert loadedGraphRDD.partitioner().isPresent();
            } else {
                // no easy way to test this with a test case
                assert skipPartitioner == !loadedGraphRDD.partitioner().isPresent();

                this.logger.debug("Partitioning has been skipped for the loaded graphRDD via "
                        + Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            }
        }
        // if the loaded graphRDD was already partitioned previous,
        // then this coalesce/repartition will not take place
        if (this.workersSet) {
            // ensures that the loaded graphRDD does not have more partitions than workers
            if (loadedGraphRDD.partitions().size() > this.workers) {
                loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
            } else {
                // ensures that the loaded graphRDD does not have less partitions than workers
                if (loadedGraphRDD.partitions().size() < this.workers) {
                    loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
                }
            }
        }
        // persist the vertex program loaded graph as specified by configuration
        // or else use default cache() which is MEMORY_ONLY
        if (!skipPersist && (!inputFromSpark || partitioned || filtered)) {
            loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString(
                    hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
        }
        // final graph with view
        // (for persisting and/or mapReducing -- may be null and thus, possible to save space/time)
        JavaPairRDD<Object, VertexWritable> computedGraphRDD = null;
        try {
            ////////////////////////////////
            // process the vertex program //
            ////////////////////////////////
            if (null != this.vertexProgram) {
                memory = new GraknSparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                /////////////////
                // if there is a registered VertexProgramInterceptor, use it to bypass the GraphComputer semantics
                if (graphComputerConfiguration
                        .containsKey(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)) {
                    try {
                        final GraknSparkVertexProgramInterceptor<VertexProgram> interceptor = (GraknSparkVertexProgramInterceptor) Class
                                .forName(graphComputerConfiguration
                                        .getString(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR))
                                .newInstance();
                        computedGraphRDD = interceptor.apply(this.vertexProgram, loadedGraphRDD, memory);
                    } catch (final ClassNotFoundException | IllegalAccessException | InstantiationException e) {
                        throw new IllegalStateException(e.getMessage());
                    }
                } else {
                    // standard GraphComputer semantics
                    // get a configuration that will be propagated to all workers
                    final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration();
                    this.vertexProgram.storeState(vertexProgramConfiguration);
                    // set up the vertex program and wire up configurations
                    this.vertexProgram.setup(memory);
                    JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null;
                    memory.broadcastMemory(sparkContext);
                    // execute the vertex program
                    while (true) {
                        if (Thread.interrupted()) {
                            sparkContext.cancelAllJobs();
                            throw new TraversalInterruptedException();
                        }
                        memory.setInExecute(true);
                        viewIncomingRDD = GraknSparkExecutor.executeVertexProgramIteration(loadedGraphRDD,
                                viewIncomingRDD, memory, graphComputerConfiguration,
                                vertexProgramConfiguration);
                        memory.setInExecute(false);
                        if (this.vertexProgram.terminate(memory)) {
                            break;
                        } else {
                            memory.incrIteration();
                            memory.broadcastMemory(sparkContext);
                        }
                    }
                    // if the graph will be continued to be used (persisted or mapreduced),
                    // then generate a view+graph
                    if ((null != outputRDD && !this.persist.equals(Persist.NOTHING))
                            || !this.mapReducers.isEmpty()) {
                        computedGraphRDD = GraknSparkExecutor.prepareFinalGraphRDD(loadedGraphRDD,
                                viewIncomingRDD, this.vertexProgram.getVertexComputeKeys());
                        assert null != computedGraphRDD && computedGraphRDD != loadedGraphRDD;
                    } else {
                        // ensure that the computedGraphRDD was not created
                        assert null == computedGraphRDD;
                    }
                }
                /////////////////
                memory.complete(); // drop all transient memory keys
                // write the computed graph to the respective output (rdd or output format)
                if (null != outputRDD && !this.persist.equals(Persist.NOTHING)) {
                    // the logic holds that a computeGraphRDD must be created at this point
                    assert null != computedGraphRDD;

                    outputRDD.writeGraphRDD(graphComputerConfiguration, computedGraphRDD);
                }
            }

            final boolean computedGraphCreated = computedGraphRDD != null && computedGraphRDD != loadedGraphRDD;
            if (!computedGraphCreated) {
                computedGraphRDD = loadedGraphRDD;
            }

            final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory);

            //////////////////////////////
            // process the map reducers //
            //////////////////////////////
            if (!this.mapReducers.isEmpty()) {
                // create a mapReduceRDD for executing the map reduce jobs on
                JavaPairRDD<Object, VertexWritable> mapReduceRDD = computedGraphRDD;
                if (computedGraphCreated && !outputToSpark) {
                    // drop all the edges of the graph as they are not used in mapReduce processing
                    mapReduceRDD = computedGraphRDD.mapValues(vertexWritable -> {
                        vertexWritable.get().dropEdges(Direction.BOTH);
                        return vertexWritable;
                    });
                    // if there is only one MapReduce to execute, don't bother wasting the clock cycles.
                    if (this.mapReducers.size() > 1) {
                        mapReduceRDD = mapReduceRDD.persist(StorageLevel.fromString(hadoopConfiguration
                                .get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
                    }
                }

                for (final MapReduce mapReduce : this.mapReducers) {
                    // execute the map reduce job
                    final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration(
                            graphComputerConfiguration);
                    mapReduce.storeState(newApacheConfiguration);
                    // map
                    final JavaPairRDD mapRDD = GraknSparkExecutor.executeMap(mapReduceRDD, mapReduce,
                            newApacheConfiguration);
                    // combine
                    final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE)
                            ? GraknSparkExecutor.executeCombine(mapRDD, newApacheConfiguration)
                            : mapRDD;
                    // reduce
                    final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE)
                            ? GraknSparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration)
                            : combineRDD;
                    // write the map reduce output back to disk and computer result memory
                    if (null != outputRDD) {
                        mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD(
                                graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD));
                    }
                }
                // if the mapReduceRDD is not simply the computed graph, unpersist the mapReduceRDD
                if (computedGraphCreated && !outputToSpark) {
                    assert loadedGraphRDD != computedGraphRDD;
                    assert mapReduceRDD != computedGraphRDD;
                    mapReduceRDD.unpersist();
                } else {
                    assert mapReduceRDD == computedGraphRDD;
                }
            }

            // unpersist the loaded graph if it will not be used again (no PersistedInputRDD)
            // if the graphRDD was loaded from Spark, but then partitioned or filtered, its a different RDD
            if (!inputFromSpark || partitioned || filtered) {
                loadedGraphRDD.unpersist();
            }
            // unpersist the computed graph if it will not be used again (no PersistedOutputRDD)
            // if the computed graph is the loadedGraphRDD because it was not mutated and not-unpersisted,
            // then don't unpersist the computedGraphRDD/loadedGraphRDD
            if ((!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING))
                    && computedGraphCreated) {
                computedGraphRDD.unpersist();
            }
            // delete any file system or rdd data if persist nothing
            if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) {
                if (outputToHDFS) {
                    fileSystemStorage.rm(outputLocation);
                }
                if (outputToSpark) {
                    sparkContextStorage.rm(outputLocation);
                }
            }
            // update runtime and return the newly computed graph
            finalMemory.setRuntime(System.currentTimeMillis() - startTime);
            // clear properties that should not be propagated in an OLAP chain
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_GRAPH_FILTER);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            return new DefaultComputerResult(InputOutputHelper.getOutputGraph(graphComputerConfiguration,
                    this.resultGraph, this.persist), finalMemory.asImmutable());
        } catch (Exception e) {
            // So it throws the same exception as tinker does
            throw new RuntimeException(e);
        }
    });
    computerService.shutdown();
    return result;
}

From source file:io.hops.erasure_coding.Codec.java

License:Apache License

public ErasureCode createErasureCode(Configuration conf) {
    // Create the scheduler
    Class<?> erasureCode = null;
    try {//from w  w w .  j a v a2 s. c om
        erasureCode = conf.getClass(ERASURE_CODE_KEY_PREFIX + this.id,
                conf.getClassByName(this.erasureCodeClass));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
    ErasureCode code = (ErasureCode) ReflectionUtils.newInstance(erasureCode, conf);
    code.init(this);
    return code;
}

From source file:org.apache.blur.server.TableContext.java

License:Apache License

private static TableContext createInternal(TableDescriptor tableDescriptor, boolean remote, Iface client,
        String name, String tableUri) {
    TableContext tableContext;/*from w  w  w  . ja  v a2s .com*/
    LOG.info("Creating table context for table [{0}]", name);
    Configuration configuration = getSystemConfiguration();
    BlurConfiguration blurConfiguration = getSystemBlurConfiguration();
    Map<String, String> tableProperties = tableDescriptor.getTableProperties();
    if (tableProperties != null) {
        for (Entry<String, String> prop : tableProperties.entrySet()) {
            configuration.set(prop.getKey(), prop.getValue());
            blurConfiguration.set(prop.getKey(), prop.getValue());
        }
    }

    tableContext = new TableContext();
    tableContext._configuration = configuration;
    tableContext._blurConfiguration = blurConfiguration;
    tableContext._tablePath = new Path(tableUri);

    tableContext._defaultFieldName = SUPER;
    tableContext._table = name;
    tableContext._descriptor = tableDescriptor;
    tableContext._timeBetweenCommits = configuration.getLong(BLUR_SHARD_TIME_BETWEEN_COMMITS, 60000);
    tableContext._timeBetweenRefreshs = configuration.getLong(BLUR_SHARD_TIME_BETWEEN_REFRESHS, 5000);
    tableContext._defaultPrimeDocTerm = new Term(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE);
    tableContext._defaultScoreType = ScoreType.SUPER;

    // TODO make configurable
    tableContext._discoverableFields = new HashSet<String>(
            Arrays.asList(BlurConstants.ROW_ID, BlurConstants.RECORD_ID, BlurConstants.FAMILY));

    // TODO make configurable
    tableContext._accessControlFactory = new FilterAccessControlFactory();

    boolean strict = tableDescriptor.isStrictTypes();
    String defaultMissingFieldType = tableDescriptor.getDefaultMissingFieldType();
    boolean defaultMissingFieldLessIndexing = tableDescriptor.isDefaultMissingFieldLessIndexing();
    Map<String, String> defaultMissingFieldProps = emptyIfNull(tableDescriptor.getDefaultMissingFieldProps());

    Path storagePath = new Path(tableContext._tablePath, TYPES);
    try {
        FieldManager fieldManager;
        if (remote) {
            fieldManager = new ThriftFieldManager(SUPER, new NoStopWordStandardAnalyzer(), strict,
                    defaultMissingFieldType, defaultMissingFieldLessIndexing, defaultMissingFieldProps,
                    configuration, client, name);
        } else {
            fieldManager = new HdfsFieldManager(SUPER, new NoStopWordStandardAnalyzer(), storagePath,
                    configuration, strict, defaultMissingFieldType, defaultMissingFieldLessIndexing,
                    defaultMissingFieldProps);
        }
        loadCustomTypes(tableContext, blurConfiguration, fieldManager);
        fieldManager.loadFromStorage();
        tableContext._fieldManager = fieldManager;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    Class<?> c1 = configuration.getClass(BLUR_SHARD_INDEX_DELETION_POLICY_MAXAGE,
            KeepOnlyLastCommitDeletionPolicy.class);
    tableContext._indexDeletionPolicy = (IndexDeletionPolicy) configure(
            ReflectionUtils.newInstance(c1, configuration), tableContext);
    Class<?> c2 = configuration.getClass(BLUR_SHARD_INDEX_SIMILARITY, FairSimilarity.class);
    tableContext._similarity = (Similarity) configure(ReflectionUtils.newInstance(c2, configuration),
            tableContext);

    String readInterceptorClass = blurConfiguration.get(BLUR_SHARD_READ_INTERCEPTOR);
    if (readInterceptorClass == null || readInterceptorClass.trim().isEmpty()) {
        tableContext._readInterceptor = DEFAULT_INTERCEPTOR;
    } else {
        try {
            @SuppressWarnings("unchecked")
            Class<? extends ReadInterceptor> clazz = (Class<? extends ReadInterceptor>) Class
                    .forName(readInterceptorClass);
            Constructor<? extends ReadInterceptor> constructor = clazz
                    .getConstructor(new Class[] { BlurConfiguration.class });
            tableContext._readInterceptor = constructor.newInstance(blurConfiguration);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    tableContext._similarity = (Similarity) configure(ReflectionUtils.newInstance(c2, configuration),
            tableContext);
    // DEFAULT_INTERCEPTOR

    _cache.put(name, tableContext);
    return tableContext.clone();
}

From source file:org.apache.crunch.types.avro.AvroMode.java

License:Apache License

@SuppressWarnings("unchecked")
void setFromConfiguration(Configuration conf) {
    // although the shuffle and input/output use different properties for mode,
    // this is shared - only one ReaderWriterFactory can be used.
    Class<?> factoryClass = conf.getClass(propName, this.getClass());
    if (factoryClass != this.getClass()) {
        this.factory = (ReaderWriterFactory) ReflectionUtils.newInstance(factoryClass, conf);
    }/* w  ww.ja  va2s  . co m*/
}

From source file:org.apache.crunch.types.avro.SafeAvroSerialization.java

License:Apache License

/**
 * Returns the specified map output deserializer. Defaults to the final output
 * deserializer if no map output schema was specified.
 *//*from   ww  w .j  a v  a 2s. c  o m*/
public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) {
    boolean isKey = AvroKey.class.isAssignableFrom(c);
    Configuration conf = getConf();
    Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf))
            : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf));

    DatumReader<T> datumReader = null;
    if (conf.getBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, false)) {
        ReflectDataFactory factory = (ReflectDataFactory) ReflectionUtils
                .newInstance(conf.getClass("crunch.reflectdatafactory", ReflectDataFactory.class), conf);
        datumReader = factory.getReader(schema);
    } else {
        datumReader = new SpecificDatumReader<T>(schema);
    }
    return new AvroWrapperDeserializer(datumReader, isKey);
}

From source file:org.apache.giraph.graph.BspUtils.java

License:Apache License

/**
 * Get the user's subclassed vertex index class.
 *
 * @param <I> Vertex id/*ww w  . j ava2  s.c  o  m*/
 * @param conf Configuration to check
 * @return User's vertex index class
 */
@SuppressWarnings("unchecked")
public static <I extends Writable> Class<I> getVertexIndexClass(Configuration conf) {
    return (Class<I>) conf.getClass(GiraphJob.VERTEX_INDEX_CLASS, WritableComparable.class);
}

From source file:org.apache.giraph.graph.BspUtils.java

License:Apache License

/**
 * Get the user's subclassed vertex value class.
 *
 * @param <V> Vertex data/*from  w ww . j  a va 2  s.c  o m*/
 * @param conf Configuration to check
 * @return User's vertex value class
 */
@SuppressWarnings("unchecked")
public static <V extends Writable> Class<V> getVertexValueClass(Configuration conf) {
    return (Class<V>) conf.getClass(GiraphJob.VERTEX_VALUE_CLASS, Writable.class);
}

From source file:org.apache.giraph.graph.BspUtils.java

License:Apache License

/**
 * Get the user's subclassed edge value class.
 *
 * @param <E> Edge data/*from w ww .  jav a2s . c  o m*/
 * @param conf Configuration to check
 * @return User's vertex edge value class
 */
@SuppressWarnings("unchecked")
public static <E extends Writable> Class<E> getEdgeValueClass(Configuration conf) {
    return (Class<E>) conf.getClass(GiraphJob.EDGE_VALUE_CLASS, Writable.class);
}

From source file:org.apache.giraph.graph.BspUtils.java

License:Apache License

/**
 * Get the user's subclassed vertex message value class.
 *
 * @param <M> Message data/*from   w w w .  j  a  v  a  2s. com*/
 * @param conf Configuration to check
 * @return User's vertex message value class
 */
@SuppressWarnings("unchecked")
public static <M extends Writable> Class<M> getMessageValueClass(Configuration conf) {
    return (Class<M>) conf.getClass(GiraphJob.MESSAGE_VALUE_CLASS, Writable.class);
}