Java tutorial
/** * Author: Jose M. Gimenez-Garcia: josemiguel.gimenez@alumnos.uva.es * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Contacting the authors: * Jose M. Gimenez-Garcia: josemiguel.gimenez@alumnos.uva.es * Javier D. Fernandez: jfergar@infor.uva.es, javier.fernandez@wu.ac.at * Miguel A. Martinez-Prieto: migumar2@infor.uva.es */ package org.rdfhdt.mrbuilder; import java.io.IOException; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.rdfhdt.hdt.options.HDTSpecification; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; public class HDTBuilderConfiguration { public final static int CHUNK_SIZE = 1 * 1024 * 1024; public final static String SHARED = "shared"; public final static String SUBJECTS = "subjects"; public final static String PREDICATES = "predicates"; public final static String OBJECTS = "objects"; public final static String SAMPLE = "samples"; public final static String SHARED_OUTPUT_PATH = SHARED + "/"; public final static String SUBJECTS_OUTPUT_PATH = SUBJECTS + "/"; public final static String PREDICATES_OUTPUT_PATH = PREDICATES + "/"; public final static String OBJECTS_OUTPUT_PATH = OBJECTS + "/"; public final static String SAMPLE_OUTPUT_PATH = SAMPLE + "/"; final static String DEFAULT_CONFIGURATION_PATH = "HDTMRBuilder.xml"; final static String AWS_BUCKET_NAME = "global.bucket"; final static String AWS_BUCKET_DEFAULT_VALUE = null; final static String BASE_PATH_NAME = "global.path.base"; final static String BASE_PATH_DEFAULT_VALUE = "."; final static String INPUT_PATH_NAME = "global.path.input"; final static String INPUT_PATH_DEFAULT_VALUE = "input"; final static String DICTIONARY_RUN_JOB_NAME = "job.dictionary.run"; final static Boolean DICTIONARY_RUN_JOB_DEFAULT_VALUE = true; final static String DICTIONARY_JOB_NAME_NAME = "job.dictionary.name"; final static String DICTIONARY_JOB_NAME_DEFAULT_VALUE = "DictionaryJob"; final static String DICTIONARY_OUTPUT_PATH_NAME = "job.dictionary.path.output"; final static String DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE = "dictionary"; final static String DICTIONARY_DELETE_OUTPUT_PATH_NAME = "job.dictionary.path.output.delete"; final static boolean DICTIONARY_DELETE_OUTPUT_PATH_DEFAULT_VALUE = false; final static String DICTIONARY_NUM_REDUCERS_NAME = "job.dictionary.reducers"; final static int DICTIONARY_NUM_REDUCERS_DEFAULT_VALUE = 1; final static String DICTIONARY_RUN_SAMPLE_NAME = "job.dictionary.sample.run"; final static boolean DICTIONARY_RUN_SAMPLE_DEFAULT_VALUE = true; final static String DICTIONARY_SAMPLE_PROBABILITY_NAME = "job.dictionary.sample.probability"; final static float DICTIONARY_SAMPLE_PROBABILITY_DEFAULT_VALUE = (float) 0.001; final static String DICTIONARY_SAMPLE_OUTPUT_PATH_NAME = "job.dictionary.path.sample"; final static String DICTIONARY_SAMPLE_OUTPUT_PATH_DEFAULT_VALUE = "dictionary_samples"; final static String DICTIONARY_DELETE_SAMPLE_PATH_NAME = "job.dictionary.path.sample.delete"; final static boolean DICTIONARY_DELETE_SAMPLE_PATH_DEFAULT_VALUE = false; final static String DICTIONARY_SAMPLE_NUM_REDUCERS_NAME = "job.dictionary.sample.reducers"; final static int DICTIONARY_SAMPLE_NUM_REDUCERS_DEFAULT_VALUE = 1; final static String HDTDICTIONARY_BUILD_NAME = "hdt.dictionary.build"; final static boolean HDTDICTIONARY_BUILD_DEFAULT_VALUE = true; final static String HDTDICTIONARY_FILE_NAME = "hdt.dictionary.file"; final static String HDTDICTIONARY_FILE_DEFAULT_VALUE = "dictionary.hdt"; final static String HDTDICTIONARY_DISTRIBUTION_NAME = "job.triples.dictionary.distribution"; final static int HDTDICTIONARY_DISTRIBUTION_DEFAULT_VALUE = 1; final static String TRIPLES_RUN_JOB_NAME = "job.triples.run"; final static boolean TRIPLES_RUN_JOB_DEFAULT_VALUE = true; final static String TRIPLES_JOB_NAME_NAME = "job.triples.name"; final static String TRIPLES_JOB_NAME_DEFAULT_VALUE = "TriplesJob"; // final static String TRIPLES_MAP_DICTIONARY_FILE_NAME = "job.triples.map.dictionary.file"; // final static String TRIPLES_MAP_DICTIONARY_FILE_DEFAULT_VALUE = "dictionary_map.hdt"; // final static String TRIPLES_REDUCE_DICTIONARY_FILE_NAME = "job.triples.reduce.dictionary.file"; // final static String TRIPLES_REDUCE_DICTIONARY_FILE_DEFAULT_VALUE = "dictionary_reduce.hdt"; final static String TRIPLES_OUTPUT_PATH_NAME = "job.triples.path.output"; final static String TRIPLES_OUTPUT_PATH_DEFAULT_VALUE = "triples"; final static String TRIPLES_DELETE_OUTPUT_PATH_NAME = "job.triples.path.output.delete"; final static boolean TRIPLES_DELETE_OUTPUT_PATH_DEFAULT_VALUE = false; final static String TRIPLES_NUM_REDUCERS_NAME = "job.triples.reducers"; final static int TRIPLES_NUM_REDUCERS_DEFAULT_VALUE = 1; final static String TRIPLES_RUN_SAMPLE_NAME = "job.triples.sample.run"; final static boolean TRIPLES_RUN_SAMPLE_DEFAULT_VALUE = true; final static String TRIPLES_SAMPLE_PROBABILITY_NAME = "job.triples.sample.probability"; final static float TRIPLES_SAMPLE_PROBABILITY_DEFAULT_VALUE = (float) 0.001; final static String TRIPLES_SAMPLE_OUTPUT_PATH_NAME = "job.triples.path.sample"; final static String TRIPLES_SAMPLE_OUTPUT_PATH_DEFAULT_VALUE = "triples_samples"; final static String TRIPLES_DELETE_SAMPLE_PATH_NAME = "job.triples.path.sample.delete"; final static boolean TRIPLES_DELETE_SAMPLE_PATH_DEFAULT_VALUE = false; final static String TRIPLES_SAMPLE_NUM_REDUCERS_NAME = "job.triples.sample.reducers"; final static int TRIPLES_SAMPLE_NUM_REDUCERS_DEFAULT_VALUE = 1; final static String HDT_BUILD_NAME = "hdt.build"; final static boolean HDT_BUILD_DEFAULT_VALUE = true; final static String HDT_OUTPUT_PATH_NAME = "hdt.path.output"; final static String HDT_OUTPUT_PATH_DEFAULT_VALUE = "hdt_output"; final static String HDT_FILE_NAME = "hdt.file"; final static String HDT_FILE_DEFAULT_VALUE = "output.hdt"; final static String CONFIG_FILE_NAME = "hdt-lib.configFile"; final static String CONFIG_FILE_DEFAULT_VALUE = null; final static String OPTIONS_NAME = "hdtl-lib.options"; final static String OPTIONS_DEFAULT_VALUE = null; final static String RDF_TYPE_NAME = "hdt-lib.rdfType"; final static String RDF_TYPE_DEFAULT_VALUE = "ntriples"; final static String QUIET_NAME = "hdt-lib.quiet"; final static boolean QUIET_DEFAULT_VALUE = false; final static String BASE_URI_NAME = "hdt-lib.baseUri"; final static String BASE_URI_DEFAULT_VALUE = "http://rdfhdt.org/HDTMR"; final static String GENERATE_INDEX_NAME = "hdt-lib.generateIndex"; final static boolean GENERATE_INDEX_DEFAULT_VALUE = false; JCommander jc; @Parameter(names = { "-h", "--help" }, help = true, hidden = true) boolean help = false; @Parameter(names = { "-a", "--awsbucket" }, description = "Amazon Web Services bucket") String pAwsBucket = null; @Parameter(names = { "-c", "--conf" }, description = "Path to configuration file") String pConfigFile = null; @Parameter(names = { "-b", "--basedir" }, description = "Root directory for the process") String pBasePath = null; @Parameter(names = { "-rd", "--rundictionary" }, description = "Whether to run dictionary job or not", arity = 1) Boolean pRunDictionary = null; @Parameter(names = { "-rds", "--rundictionarysampling" }, description = "Whether to run dictionary input sampling job or not", arity = 1) Boolean pRunDictionarySampling = null; @Parameter(names = { "-nd", "--namedictionaryjob" }, description = "Name of dictionary job") String pDictionaryName = null; @Parameter(names = { "-i", "--input" }, description = "Path to input files. Relative to basedir") String pInputPath = null; @Parameter(names = { "-sd", "--samplesdictionary" }, description = "Path to dictionary job sample files. Relative to basedir") String pDictionarySamplePath = null; @Parameter(names = { "-st", "--samplestriples" }, description = "Path to triples job sample files. Relative to basedir") String pTriplesSamplePath = null; @Parameter(names = { "-od", "--outputdictionary" }, description = "Path to dictionary job output files. Relative to basedir") String pDictionaryOutputPath = null; @Parameter(names = { "-dd", "--deleteoutputdictionary" }, description = "Delete dictionary job output path before running job") Boolean pDeleteDictionaryOutputPath = null; @Parameter(names = { "-dsd", "--deletesampledictionary" }, description = "Delete dictionary job sample path before running job") Boolean pDeleteDictionarySamplePath = null; @Parameter(names = { "-dst", "--deletesampletriples" }, description = "Delete triples job sample path before running job") Boolean pDeleteTriplesSamplePath = null; @Parameter(names = { "-Rd", "--reducersdictionary" }, description = "Number of reducers for dictionary job") Integer pNumReducersDictionary = null; @Parameter(names = { "-Rds", "--reducersdictionarysampling" }, description = "Number of reducers for dictionary input sampling job") Integer pNumReducersDictionarySampling = null; @Parameter(names = { "-bd", "--builddictionary" }, description = "Whether to build HDT dictionary or not", arity = 1) Boolean pBuildDictionary = null; @Parameter(names = { "-bh", "--buildhdt" }, description = "Whether to build HDT or not", arity = 1) Boolean pBuildHDT = null; @Parameter(names = { "-fd", "--filedictionary" }, description = "Name of hdt dictionary file") String pDictionaryFileName = null; @Parameter(names = { "-fm", "--filesubjects" }, description = "Name of hdt dictionary file for Mappers") String pMapDictionaryFileName = null; @Parameter(names = { "-fr", "--fileobjects" }, description = "Name of hdt dictionary file for Reducers") String pReduceDictionaryFileName = null; @Parameter(names = { "-d", "--dictionarydistribution" }, description = "Dictionary distribution among mappers and reducers") Integer pDictionaryDistribution = null; @Parameter(names = { "-rt", "--runtriples" }, description = "Whether to run triples job or not", arity = 1) Boolean pRunTriples = null; @Parameter(names = { "-rts", "--runtriplessampling" }, description = "Whether to run triples input sampling job or not", arity = 1) Boolean pRunTriplesSampling = null; @Parameter(names = { "-nt", "--nametriplesjob" }, description = "Name of triples job") String pTriplesName = null; @Parameter(names = { "-it", "--inputtriples" }, description = "Path to triples job input files. Relative to basedir") String pTriplesInputPath = null; @Parameter(names = { "-ot", "--outputtriples" }, description = "Path to triples job output files. Relative to basedir") String pTriplesOutputPath = null; @Parameter(names = { "-dt", "--deleteoutputtriples" }, description = "Delete triples job output path before running job") Boolean pDeleteTriplesOutputPath = null; @Parameter(names = { "-Rt", "--reducerstriples" }, description = "Number of reducers for triples job") Integer pNumReducersTriples = null; @Parameter(names = { "-Rts", "--reducerstriplessampling" }, description = "Number of reducers for triples input sampling job") Integer pNumReducersTriplesSampling = null; @Parameter(names = { "-fh", "--namehdtfile" }, description = "Name of hdt file") String pHdtFileName = null; @Parameter(names = { "-hc", "--hdtconf" }, description = "Conversion config file") String pHdtConfigFile = null; @Parameter(names = { "-o", "--options" }, description = "HDT Conversion options (override those of config file)") String pOptions = null; @Parameter(names = { "-t", "--rdftype" }, description = "Type of RDF Input (ntriples, nquad, n3, turtle, rdfxml)") String pRdfType = null; @Parameter(names = { "-bu", "--baseURI" }, description = "Base URI for the dataset") String pBaseURI = null; @Parameter(names = { "-q", "--quiet" }, description = "Do not show progress of the conversion") Boolean pQuiet = null; @Parameter(names = { "-x", "--index" }, description = "Generate also external indices to solve all queries") Boolean pGenerateIndex = null; @Parameter(names = { "-p", "--sampleprobability" }, description = "Probability of using each element for sampling") Float pSampleProbability = null; Path inputPath = null, dictionarySamplesPath = null, dictionaryOutputPath = null, sharedOutputPath = null, subjectsOutputPath = null, predicatesOutputPath = null, objectsOutputPath = null; Path dictionaryCountersFile = null, triplesSamplesPath = null, triplesCountersFile = null, hdtDictionarySPOFile = null, hdtMapDictionaryFile = null, hdtReduceDictionaryFile = null, hdtFile = null; Path triplesInputPath = null, triplesOutputPath = null; Configuration mrConfiguration = new Configuration(); HDTSpecification spec; // This constructor is to be used by Tasks (Mappers and/or Reducers) public HDTBuilderConfiguration(Configuration config) throws IOException { this.mrConfiguration = config; } // This constructor is to be used by Drivers public HDTBuilderConfiguration(String[] args) { this.jc = new JCommander(this, args); if (this.help) { this.jc.usage(); System.exit(1); } this.addConfigurationResource(this.getConfigFile()); // FIXME: Esto debera hacerse para todos los parmetros pasados por // lnea de comandos this.setProperty(DICTIONARY_OUTPUT_PATH_NAME, this.getDictionaryOutputPath().toString()); } private void addConfigurationResource(String configurationPath) { this.mrConfiguration.addResource(new Path(configurationPath)); } private String getConfigFile() { return this.addBucket(this.pConfigFile != null ? this.pConfigFile : DEFAULT_CONFIGURATION_PATH); } public Configuration getConfigurationObject() { return this.mrConfiguration; } public void setProperty(String name, String value) { this.mrConfiguration.set(name, value); } public void setProperty(String name, int value) { this.mrConfiguration.setInt(name, value); } public String getAwsBucket() { return this.get(this.pAwsBucket, AWS_BUCKET_NAME, AWS_BUCKET_DEFAULT_VALUE); } public boolean runDictionary() { return this.get(this.pRunDictionary, DICTIONARY_RUN_JOB_NAME, DICTIONARY_RUN_JOB_DEFAULT_VALUE); } public boolean runDictionarySampling() { return this.get(this.pRunDictionarySampling, DICTIONARY_RUN_SAMPLE_NAME, DICTIONARY_RUN_SAMPLE_DEFAULT_VALUE); } public boolean runTriples() { return this.get(this.pRunTriples, TRIPLES_RUN_JOB_NAME, TRIPLES_RUN_JOB_DEFAULT_VALUE); } public boolean runTriplesSampling() { return this.get(this.pRunTriplesSampling, TRIPLES_RUN_SAMPLE_NAME, TRIPLES_RUN_SAMPLE_DEFAULT_VALUE); } public boolean buildDictionary() { return this.get(this.pBuildDictionary, HDTDICTIONARY_BUILD_NAME, HDTDICTIONARY_BUILD_DEFAULT_VALUE); } public boolean buildHDT() { return this.get(this.pBuildHDT, HDT_BUILD_NAME, HDT_BUILD_DEFAULT_VALUE); } public String getDictionaryJobName() { return this.get(this.pTriplesName, DICTIONARY_JOB_NAME_NAME, DICTIONARY_JOB_NAME_DEFAULT_VALUE); } public String getTriplesJobName() { return this.get(this.pTriplesName, DICTIONARY_JOB_NAME_NAME, DICTIONARY_JOB_NAME_DEFAULT_VALUE); } public Path getInputPath() { if (this.inputPath == null) { this.inputPath = new Path( this.getPath(this.get(this.pInputPath, INPUT_PATH_NAME, INPUT_PATH_DEFAULT_VALUE))); } return this.inputPath; } public Path getDictionaryOutputPath() { if (this.dictionaryOutputPath == null) { this.dictionaryOutputPath = new Path(this.getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE))); } return this.dictionaryOutputPath; } public Path getSharedSectionPath() { if (this.sharedOutputPath == null) { this.sharedOutputPath = new Path(this.getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE)) + "/" + SHARED_OUTPUT_PATH); } return this.sharedOutputPath; } public Path getSubjectsSectionPath() { if (this.subjectsOutputPath == null) { this.subjectsOutputPath = new Path( this.getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE)) + "/" + SUBJECTS_OUTPUT_PATH); } return this.subjectsOutputPath; } public Path getPredicatesSectionPath() { if (this.predicatesOutputPath == null) { this.predicatesOutputPath = new Path( this.getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE)) + "/" + PREDICATES_OUTPUT_PATH); } return this.predicatesOutputPath; } public Path getObjectsSectionPath() { if (this.objectsOutputPath == null) { this.objectsOutputPath = new Path( this.getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE)) + "/" + OBJECTS_OUTPUT_PATH); } return this.objectsOutputPath; } public Path getDictionarySamplesPath() { if (this.dictionarySamplesPath == null) { this.dictionarySamplesPath = new Path(this.getPath(this.get(this.pDictionarySamplePath, DICTIONARY_SAMPLE_OUTPUT_PATH_NAME, DICTIONARY_SAMPLE_OUTPUT_PATH_DEFAULT_VALUE))); } return this.dictionarySamplesPath; } public Path getTriplesSamplesPath() { if (this.triplesSamplesPath == null) { this.triplesSamplesPath = new Path(this.getPath(this.get(this.pTriplesSamplePath, TRIPLES_SAMPLE_OUTPUT_PATH_NAME, TRIPLES_SAMPLE_OUTPUT_PATH_DEFAULT_VALUE))); } return this.triplesSamplesPath; } public float getSampleProbability() { return this.get(this.pSampleProbability, DICTIONARY_SAMPLE_PROBABILITY_NAME, DICTIONARY_SAMPLE_PROBABILITY_DEFAULT_VALUE); } public Path getDictionaryCountersFile() { if (this.dictionaryCountersFile == null) { this.dictionaryCountersFile = new Path(this.getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE)) + ".info"); } return this.dictionaryCountersFile; } public Path getDictionaryFile() { if (this.hdtDictionarySPOFile == null) { this.hdtDictionarySPOFile = new Path(this .getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE)) + "/" + this.get(this.pDictionaryFileName, HDTDICTIONARY_FILE_NAME, HDTDICTIONARY_FILE_DEFAULT_VALUE)); } return this.hdtDictionarySPOFile; } // public Path getDictionaryMapFile() { // if (this.hdtMapDictionaryFile == null) { // this.hdtMapDictionaryFile = new Path(this.getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE)) + "/" + this.get(this.pMapDictionaryFileName, TRIPLES_MAP_DICTIONARY_FILE_NAME, TRIPLES_MAP_DICTIONARY_FILE_DEFAULT_VALUE)); // } // return this.hdtMapDictionaryFile; // } // // public Path getDictionaryReduceFile() { // if (this.hdtReduceDictionaryFile == null) { // this.hdtReduceDictionaryFile = new Path(this.getPath(this.get(this.pDictionaryOutputPath, DICTIONARY_OUTPUT_PATH_NAME, DICTIONARY_OUTPUT_PATH_DEFAULT_VALUE)) + "/" + this.get(this.pReduceDictionaryFileName, TRIPLES_REDUCE_DICTIONARY_FILE_NAME, TRIPLES_REDUCE_DICTIONARY_FILE_DEFAULT_VALUE)); // } // return this.hdtReduceDictionaryFile; // } public int getDictionaryDistribution() { return this.get(this.pDictionaryDistribution, HDTDICTIONARY_DISTRIBUTION_NAME, HDTDICTIONARY_DISTRIBUTION_DEFAULT_VALUE); } public Path getTriplesOutputPath() { if (this.triplesOutputPath == null) { this.triplesOutputPath = new Path(this.getPath(this.get(this.pTriplesOutputPath, TRIPLES_OUTPUT_PATH_NAME, TRIPLES_OUTPUT_PATH_DEFAULT_VALUE))); } return this.triplesOutputPath; } public Path getTriplesCountersFile() { if (this.triplesCountersFile == null) { this.triplesCountersFile = new Path(this.getPath( this.get(this.pTriplesOutputPath, TRIPLES_OUTPUT_PATH_NAME, TRIPLES_OUTPUT_PATH_DEFAULT_VALUE)) + ".info"); } return this.triplesCountersFile; } public Path getHDTFile() { if (this.hdtFile == null) { this.hdtFile = new Path( this.getPath(this.get(this.pHdtFileName, HDT_FILE_NAME, HDT_FILE_DEFAULT_VALUE))); } return this.hdtFile; } public boolean getDeleteDictionaryOutputPath() { return this.get(this.pDeleteDictionaryOutputPath, DICTIONARY_DELETE_OUTPUT_PATH_NAME, DICTIONARY_DELETE_OUTPUT_PATH_DEFAULT_VALUE); } public boolean getDeleteDictionarySamplesPath() { return this.get(this.pDeleteDictionarySamplePath, DICTIONARY_DELETE_SAMPLE_PATH_NAME, DICTIONARY_DELETE_SAMPLE_PATH_DEFAULT_VALUE); } public boolean getDeleteTriplesOutputPath() { return this.get(this.pDeleteTriplesOutputPath, TRIPLES_DELETE_OUTPUT_PATH_NAME, TRIPLES_DELETE_OUTPUT_PATH_DEFAULT_VALUE); } public boolean getDeleteTriplesSamplesPath() { return this.get(this.pDeleteTriplesSamplePath, TRIPLES_DELETE_SAMPLE_PATH_NAME, TRIPLES_DELETE_SAMPLE_PATH_DEFAULT_VALUE); } public int getDictionaryReducers() { return this.get(this.pNumReducersDictionary, DICTIONARY_NUM_REDUCERS_NAME, DICTIONARY_NUM_REDUCERS_DEFAULT_VALUE); } public int getDictionarySampleReducers() { return this.get(this.pNumReducersDictionarySampling, DICTIONARY_SAMPLE_NUM_REDUCERS_NAME, DICTIONARY_SAMPLE_NUM_REDUCERS_DEFAULT_VALUE); } public int getTriplesReducers() { return this.get(this.pNumReducersTriples, TRIPLES_NUM_REDUCERS_NAME, TRIPLES_NUM_REDUCERS_DEFAULT_VALUE); } public int getTriplesSampleReducers() { return this.get(this.pNumReducersTriplesSampling, TRIPLES_SAMPLE_NUM_REDUCERS_NAME, TRIPLES_SAMPLE_NUM_REDUCERS_DEFAULT_VALUE); } public String getHdtConfigFile() { return this.getPath(this.get(this.pHdtConfigFile, CONFIG_FILE_NAME, CONFIG_FILE_DEFAULT_VALUE)); } public String getOptions() { return this.get(this.pOptions, OPTIONS_NAME, OPTIONS_DEFAULT_VALUE); } public String getRdfType() { return this.get(this.pRdfType, RDF_TYPE_NAME, RDF_TYPE_DEFAULT_VALUE); } public boolean getQuiet() { return this.get(this.pQuiet, QUIET_NAME, QUIET_DEFAULT_VALUE); } public String getBaseURI() { return this.get(this.pBaseURI, BASE_URI_NAME, BASE_URI_DEFAULT_VALUE); } public HDTSpecification getSpec() throws IOException { if (this.spec == null) { if (this.getHdtConfigFile() != null) { this.spec = new HDTSpecification(this.getHdtConfigFile()); } else { this.spec = new HDTSpecification(); } if (this.getOptions() != null) { this.spec.setOptions(this.getOptions()); } } return this.spec; } private String get(String paramValue, String confName, String defaultValue) { return paramValue != null ? paramValue : this.mrConfiguration.get(confName, defaultValue); } private boolean get(Boolean paramValue, String confName, boolean defaultValue) { return paramValue != null ? paramValue : this.mrConfiguration.getBoolean(confName, defaultValue); } private int get(Integer paramValue, String confName, int defaultValue) { return paramValue != null ? paramValue : this.mrConfiguration.getInt(confName, defaultValue); } private float get(Float paramValue, String confName, float defaultValue) { return paramValue != null ? paramValue : this.mrConfiguration.getFloat(confName, defaultValue); } private String getPath(String path) { // Add Base Path return FilenameUtils.concat(this.get(this.pBasePath, BASE_PATH_NAME, BASE_PATH_DEFAULT_VALUE), path); } private String addBucket(String path) { // If bucket is provided as parameter, and configuration path is // relative, create absolute configuration path if (this.getAwsBucket() != null && !path.startsWith("s3n://")) { path = "s3n://" + this.getAwsBucket() + "/" + StringUtils.removeStart(path, "/"); } return path; } // private void set(Integer paramValue, String confName, int defautlValue) { // mrConfiguration.setInt(confName, paramValue != null ? paramValue : // mrConfiguration.getInt(confName, defautlValue)); // } }