List of usage examples for org.apache.hadoop.conf Configuration getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:ldbc.snb.datagen.generator.LDBCDatagen.java
License:Open Source License
public int runGenerateJob(Configuration conf) throws Exception { String hadoopPrefix = conf.get("ldbc.snb.datagen.serializer.hadoopDir"); FileSystem fs = FileSystem.get(conf); ArrayList<Float> percentages = new ArrayList<Float>(); percentages.add(0.45f);// ww w .j av a 2 s. co m percentages.add(0.45f); percentages.add(0.1f); //percentages.add(1.0f); //percentages.add(0.1f); long start = System.currentTimeMillis(); printProgress("Starting: Person generation"); long startPerson = System.currentTimeMillis(); HadoopPersonGenerator personGenerator = new HadoopPersonGenerator(conf); personGenerator.run(hadoopPrefix + "/persons", "ldbc.snb.datagen.hadoop.UniversityKeySetter"); long endPerson = System.currentTimeMillis(); printProgress("Creating university location correlated edges"); long startUniversity = System.currentTimeMillis(); HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf, "ldbc.snb.datagen.hadoop.UniversityKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 0, conf.get("ldbc.snb.datagen.generator.knowsGenerator")); knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/universityEdges"); long endUniversity = System.currentTimeMillis(); printProgress("Creating main interest correlated edges"); long startInterest = System.currentTimeMillis(); knowsGenerator = new HadoopKnowsGenerator(conf, "ldbc.snb.datagen.hadoop.InterestKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 1, conf.get("ldbc.snb.datagen.generator.knowsGenerator")); knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/interestEdges"); long endInterest = System.currentTimeMillis(); printProgress("Creating random correlated edges"); long startRandom = System.currentTimeMillis(); knowsGenerator = new HadoopKnowsGenerator(conf, "ldbc.snb.datagen.hadoop.RandomKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 2, "ldbc.snb.datagen.generator.RandomKnowsGenerator"); knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/randomEdges"); long endRandom = System.currentTimeMillis(); fs.delete(new Path(DatagenParams.hadoopDir + "/persons"), true); printProgress("Merging the different edge files"); ArrayList<String> edgeFileNames = new ArrayList<String>(); edgeFileNames.add(hadoopPrefix + "/universityEdges"); edgeFileNames.add(hadoopPrefix + "/interestEdges"); edgeFileNames.add(hadoopPrefix + "/randomEdges"); long startMerge = System.currentTimeMillis(); HadoopMergeFriendshipFiles merger = new HadoopMergeFriendshipFiles(conf, "ldbc.snb.datagen.hadoop.RandomKeySetter"); merger.run(hadoopPrefix + "/mergedPersons", edgeFileNames); long endMerge = System.currentTimeMillis(); /*printProgress("Creating edges to fill the degree gap"); long startGap = System.currentTimeMillis(); knowsGenerator = new HadoopKnowsGenerator(conf,null, "ldbc.snb.datagen.hadoop.DegreeGapKeySetter", 1.0f); knowsGenerator.run(personsFileName2,personsFileName1); fs.delete(new Path(personsFileName2), true); long endGap = System.currentTimeMillis(); */ printProgress("Serializing persons"); long startPersonSerializing = System.currentTimeMillis(); if (conf.getBoolean("ldbc.snb.datagen.serializer.persons.sort", false) == false) { HadoopPersonSerializer serializer = new HadoopPersonSerializer(conf); serializer.run(hadoopPrefix + "/mergedPersons"); } else { HadoopPersonSortAndSerializer serializer = new HadoopPersonSortAndSerializer(conf); serializer.run(hadoopPrefix + "/mergedPersons"); } long endPersonSerializing = System.currentTimeMillis(); long startPersonActivity = System.currentTimeMillis(); if (conf.getBoolean("ldbc.snb.datagen.generator.activity", true)) { printProgress("Generating and serializing person activity"); HadoopPersonActivityGenerator activityGenerator = new HadoopPersonActivityGenerator(conf); activityGenerator.run(hadoopPrefix + "/mergedPersons"); int numThreads = DatagenParams.numThreads; int blockSize = DatagenParams.blockSize; int numBlocks = (int) Math.ceil(DatagenParams.numPersons / (double) blockSize); for (int i = 0; i < numThreads; ++i) { if (i < numBlocks) { fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m" + i + "personFactors.txt"), new Path("./")); fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m" + i + "activityFactors.txt"), new Path("./")); fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m0friendList" + i + ".csv"), new Path("./")); } } } long endPersonActivity = System.currentTimeMillis(); long startSortingUpdateStreams = System.currentTimeMillis(); if (conf.getBoolean("ldbc.snb.datagen.serializer.updateStreams", false)) { printProgress("Sorting update streams "); int blockSize = DatagenParams.blockSize; int numBlocks = (int) Math.ceil(DatagenParams.numPersons / (double) blockSize); List<String> personStreamsFileNames = new ArrayList<String>(); List<String> forumStreamsFileNames = new ArrayList<String>(); for (int i = 0; i < DatagenParams.numThreads; ++i) { int numPartitions = conf.getInt("ldbc.snb.datagen.serializer.numUpdatePartitions", 1); //if( i < numBlocks ) { for (int j = 0; j < numPartitions; ++j) { personStreamsFileNames .add(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j); if (conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) { forumStreamsFileNames .add(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j); } } /*} else { for (int j = 0; j < numPartitions; ++j) { fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true); fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true); } } */ } HadoopUpdateStreamSorterAndSerializer updateSorterAndSerializer = new HadoopUpdateStreamSorterAndSerializer( conf); updateSorterAndSerializer.run(personStreamsFileNames, "person"); updateSorterAndSerializer.run(forumStreamsFileNames, "forum"); for (String file : personStreamsFileNames) { fs.delete(new Path(file), true); } for (String file : forumStreamsFileNames) { fs.delete(new Path(file), true); } /*for( int i = 0; i < DatagenParams.numThreads; ++i) { int numPartitions = conf.getInt("ldbc.snb.datagen.serializer.numUpdatePartitions", 1); if( i < numBlocks ) { for (int j = 0; j < numPartitions; ++j) { HadoopFileSorter updateStreamSorter = new HadoopFileSorter(conf, LongWritable.class, Text.class); HadoopUpdateStreamSerializer updateSerializer = new HadoopUpdateStreamSerializer(conf); updateStreamSorter.run(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j, DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j); fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true); updateSerializer.run(DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j, i, j, "person"); fs.delete(new Path(DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j), true); if( conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) { updateStreamSorter.run(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j, DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j); fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true); updateSerializer.run(DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j, i, j, "forum"); fs.delete(new Path(DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j), true); } } } else { for (int j = 0; j < numPartitions; ++j) { fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true); fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true); } } }*/ long minDate = Long.MAX_VALUE; long maxDate = Long.MIN_VALUE; long count = 0; for (int i = 0; i < DatagenParams.numThreads; ++i) { Path propertiesFile = new Path( DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + ".properties"); FSDataInputStream file = fs.open(propertiesFile); Properties properties = new Properties(); properties.load(file); long aux; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time")); minDate = aux < minDate ? aux : minDate; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time")); maxDate = aux > maxDate ? aux : maxDate; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events")); count += aux; file.close(); fs.delete(propertiesFile, true); if (conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) { propertiesFile = new Path( DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + ".properties"); file = fs.open(propertiesFile); properties = new Properties(); properties.load(file); aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time")); minDate = aux < minDate ? aux : minDate; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time")); maxDate = aux > maxDate ? aux : maxDate; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events")); count += aux; file.close(); fs.delete(propertiesFile, true); } } OutputStream output = fs .create(new Path(DatagenParams.socialNetworkDir + "/updateStream" + ".properties"), true); output.write(new String("ldbc.snb.interactive.gct_delta_duration:" + DatagenParams.deltaTime + "\n") .getBytes()); output.write( new String("ldbc.snb.interactive.min_write_event_start_time:" + minDate + "\n").getBytes()); output.write( new String("ldbc.snb.interactive.max_write_event_start_time:" + maxDate + "\n").getBytes()); output.write(new String("ldbc.snb.interactive.update_interleave:" + (maxDate - minDate) / count + "\n") .getBytes()); output.write(new String("ldbc.snb.interactive.num_events:" + count).getBytes()); output.close(); } long endSortingUpdateStreams = System.currentTimeMillis(); printProgress("Serializing invariant schema "); long startInvariantSerializing = System.currentTimeMillis(); HadoopInvariantSerializer invariantSerializer = new HadoopInvariantSerializer(conf); invariantSerializer.run(); long endInvariantSerializing = System.currentTimeMillis(); long end = System.currentTimeMillis(); System.out.println(((end - start) / 1000) + " total seconds"); System.out.println("Person generation time: " + ((endPerson - startPerson) / 1000)); System.out.println( "University correlated edge generation time: " + ((endUniversity - startUniversity) / 1000)); System.out.println("Interest correlated edge generation time: " + ((endInterest - startInterest) / 1000)); System.out.println("Random correlated edge generation time: " + ((endRandom - startRandom) / 1000)); System.out.println("Edges merge time: " + ((endMerge - startMerge) / 1000)); System.out .println("Person serialization time: " + ((endPersonSerializing - startPersonSerializing) / 1000)); System.out.println("Person activity generation and serialization time: " + ((endPersonActivity - startPersonActivity) / 1000)); System.out.println( "Sorting update streams time: " + ((endSortingUpdateStreams - startSortingUpdateStreams) / 1000)); System.out.println("Invariant schema serialization time: " + ((endInvariantSerializing - startInvariantSerializing) / 1000)); System.out.println("Total Execution time: " + ((end - start) / 1000)); if (conf.getBoolean("ldbc.snb.datagen.parametergenerator.parameters", false) && conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) { System.out.println("Running Parameter Generation"); System.out.println("Generating Interactive Parameters"); ProcessBuilder pb = new ProcessBuilder("mkdir", "-p", conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters"); pb.directory(new File("./")); Process p = pb.start(); p.waitFor(); pb = new ProcessBuilder(conf.get("ldbc.snb.datagen.parametergenerator.python"), "paramgenerator/generateparams.py", "./", conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters"); pb.directory(new File("./")); File logInteractive = new File("parameters_interactive.log"); pb.redirectErrorStream(true); pb.redirectOutput(ProcessBuilder.Redirect.appendTo(logInteractive)); p = pb.start(); p.waitFor(); System.out.println("Generating BI Parameters"); pb = new ProcessBuilder(conf.get("ldbc.snb.datagen.parametergenerator.python"), "paramgenerator/generateparamsbi.py", "./", conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters"); pb.directory(new File("./")); File logBi = new File("parameters_bi.log"); pb.redirectErrorStream(true); pb.redirectOutput(ProcessBuilder.Redirect.appendTo(logBi)); p = pb.start(); p.waitFor(); System.out.println("Finished Parameter Generation"); } return 0; }
From source file:ldbc.snb.datagen.generator.PhotoGenerator.java
public PhotoGenerator(LikeGenerator likeGenerator, Configuration conf) { this.likeGenerator_ = likeGenerator; this.photo_ = new Photo(); this.richRdf = conf.getBoolean("ldbc.snb.datagen.generator.richRdf", false); }
From source file:ldbc.snb.datagen.generator.PostGenerator.java
License:Open Source License
public PostGenerator(TextGenerator generator, CommentGenerator commentGenerator, LikeGenerator likeGenerator, Configuration conf) { this.generator_ = generator; this.commentGenerator_ = commentGenerator; this.likeGenerator_ = likeGenerator; this.post_ = new Post(); this.richRdf = conf.getBoolean("ldbc.snb.datagen.generator.richRdf", false); }
From source file:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializer.java
License:Open Source License
public void initialize(Configuration conf, int reducerId) { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false)); }//from w ww . j a v a2 s . c o m ArrayList<String> arguments = new ArrayList<String>(); arguments.clear(); arguments.add("Person.id"); arguments.add("Person.id"); writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments); }
From source file:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializerExtended.java
License:Open Source License
public void initialize(Configuration conf, int reducerId) { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false)); }/*from w ww . j a v a 2 s. c o m*/ ArrayList<String> arguments = new ArrayList<String>(); arguments.add("id"); arguments.add("creationDate"); writers[FileNames.PERSON.ordinal()].writeEntry(arguments); arguments.clear(); arguments.clear(); arguments.add("Person.id"); arguments.add("Person.id"); arguments.add("CreationDate"); arguments.add("Weight"); writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments); }
From source file:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializerWithWeights.java
License:Open Source License
public void initialize(Configuration conf, int reducerId) { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", true); }//from w ww. ja va 2 s. co m ArrayList<String> arguments = new ArrayList<String>(); arguments.clear(); arguments.add("Person.id"); arguments.add("Person.id"); arguments.add("Weight"); writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments); }
From source file:ldbc.snb.datagen.serializer.graphalytics.pgx.CSVPersonSerializer.java
License:Open Source License
public void initialize(Configuration conf, int reducerId) { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), " ", conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false)); }//from w w w . ja va 2s . co m }
From source file:ldbc.snb.datagen.serializer.small.CSVPersonSerializer.java
License:Open Source License
public void initialize(Configuration conf, int reducerId) { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false)); }/*from w ww. ja v a 2 s.c om*/ ArrayList<String> arguments = new ArrayList<String>(); arguments.add("id"); arguments.add("nickname"); writers[FileNames.PERSON.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("User.id"); arguments.add("User.id"); writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments); arguments.clear(); }
From source file:ldbc.snb.datagen.serializer.snb.interactive.CSVCompositeMergeForeignPersonSerializer.java
License:Open Source License
public void initialize(Configuration conf, int reducerId) throws IOException { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false)); }//from w ww . j av a2s .c o m ArrayList<String> arguments = new ArrayList<String>(); arguments.add("id"); arguments.add("firstName"); arguments.add("lastName"); arguments.add("gender"); arguments.add("birthday"); arguments.add("creationDate"); arguments.add("locationIP"); arguments.add("browserUsed"); arguments.add("place"); arguments.add("language"); arguments.add("email"); writers[FileNames.PERSON.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Tag.id"); writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Organisation.id"); arguments.add("workFrom"); writers[FileNames.PERSON_WORK_AT.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Organisation.id"); arguments.add("classYear"); writers[FileNames.PERSON_STUDY_AT.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Person.id"); arguments.add("creationDate"); writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeHeader(arguments); }
From source file:ldbc.snb.datagen.serializer.snb.interactive.CSVCompositePersonSerializer.java
License:Open Source License
@Override public void initialize(Configuration conf, int reducerId) throws IOException { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.serializer.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false)); }/* www.j a v a 2 s . com*/ ArrayList<String> arguments = new ArrayList<String>(); arguments.add("id"); arguments.add("firstName"); arguments.add("lastName"); arguments.add("gender"); arguments.add("birthday"); arguments.add("creationDate"); arguments.add("locationIP"); arguments.add("browserUsed"); arguments.add("language"); arguments.add("email"); writers[FileNames.PERSON.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Place.id"); writers[FileNames.PERSON_LOCATED_IN_PLACE.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Tag.id"); writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Organisation.id"); arguments.add("workFrom"); writers[FileNames.PERSON_WORK_AT.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Organisation.id"); arguments.add("classYear"); writers[FileNames.PERSON_STUDY_AT.ordinal()].writeHeader(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Person.id"); arguments.add("creationDate"); writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeHeader(arguments); }