Example usage for org.apache.hadoop.conf Configuration getBoolean

List of usage examples for org.apache.hadoop.conf Configuration getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer.java

@Override
public void initialize(Configuration conf, int reducerId) {
    int numFiles = FileNames.values().length;
    writers = new HDFSCSVWriter[numFiles];
    for (int i = 0; i < numFiles; ++i) {
        writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"),
                FileNames.values()[i].toString() + "_" + reducerId,
                conf.getInt("ldbc.snb.datagen.numPartitions", 1),
                conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|",
                conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false));
    }/*from w ww .  java2  s .co  m*/
    arguments = new ArrayList<String>();

    arguments.add("id");
    arguments.add("title");
    arguments.add("creationDate");
    writers[FileNames.FORUM.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Forum.id");
    arguments.add("Post.id");
    writers[FileNames.FORUM_CONTAINEROF_POST.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Forum.id");
    arguments.add("Person.id");
    arguments.add("joinDate");
    writers[FileNames.FORUM_HASMEMBER_PERSON.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Forum.id");
    arguments.add("Person.id");
    writers[FileNames.FORUM_HASMODERATOR_PERSON.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Forum.id");
    arguments.add("Tag.id");
    writers[FileNames.FORUM_HASTAG_TAG.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Person.id");
    arguments.add("Post.id");
    arguments.add("creationDate");
    writers[FileNames.PERSON_LIKES_POST.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Person.id");
    arguments.add("Comment.id");
    arguments.add("creationDate");
    writers[FileNames.PERSON_LIKES_COMMENT.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("id");
    arguments.add("imageFile");
    arguments.add("creationDate");
    arguments.add("locationIP");
    arguments.add("browserUsed");
    arguments.add("language");
    arguments.add("content");
    arguments.add("length");
    writers[FileNames.POST.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Post.id");
    arguments.add("Person.id");
    writers[FileNames.POST_HASCREATOR_PERSON.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Post.id");
    arguments.add("Tag.id");
    writers[FileNames.POST_HASTAG_TAG.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Post.id");
    arguments.add("Place.id");
    writers[FileNames.POST_ISLOCATEDIN_PLACE.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("id");
    arguments.add("creationDate");
    arguments.add("locationIP");
    arguments.add("browserUsed");
    arguments.add("content");
    arguments.add("length");
    writers[FileNames.COMMENT.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Comment.id");
    arguments.add("Person.id");
    writers[FileNames.COMMENT_HASCREATOR_PERSON.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Comment.id");
    arguments.add("Tag.id");
    writers[FileNames.COMMENT_HASTAG_TAG.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Comment.id");
    arguments.add("Place.id");
    writers[FileNames.COMMENT_ISLOCATEDIN_PLACE.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Comment.id");
    arguments.add("Post.id");
    writers[FileNames.COMMENT_REPLYOF_POST.ordinal()].writeEntry(arguments);
    arguments.clear();

    arguments.add("Comment.id");
    arguments.add("Comment.id");
    writers[FileNames.COMMENT_REPLYOF_COMMENT.ordinal()].writeEntry(arguments);
    arguments.clear();

}

From source file:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer.java

License:Open Source License

public void initialize(Configuration conf, int reducerId) {
    int numFiles = FileNames.values().length;
    writers = new HDFSCSVWriter[numFiles];
    for (int i = 0; i < numFiles; ++i) {
        writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"),
                FileNames.values()[i].toString() + "_" + reducerId,
                conf.getInt("ldbc.snb.datagen.serializer.numPartitions", 1),
                conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|",
                conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false));
    }//from  w w w  .ja v a 2  s .c  om

    ArrayList<String> arguments = new ArrayList<String>();
    arguments.add("id");
    arguments.add("firstName");
    arguments.add("lastName");
    arguments.add("gender");
    arguments.add("birthday");
    arguments.add("creationDate");
    arguments.add("locationIP");
    arguments.add("browserUsed");
    writers[FileNames.PERSON.ordinal()].writeEntry(arguments);

    arguments.clear();
    arguments.add("Person.id");
    arguments.add("language");
    writers[FileNames.PERSON_SPEAKS_LANGUAGE.ordinal()].writeEntry(arguments);

    arguments.clear();
    arguments.add("Person.id");
    arguments.add("email");
    writers[FileNames.PERSON_HAS_EMAIL.ordinal()].writeEntry(arguments);

    arguments.clear();
    arguments.add("Person.id");
    arguments.add("Place.id");
    writers[FileNames.PERSON_LOCATED_IN_PLACE.ordinal()].writeEntry(arguments);

    arguments.clear();
    arguments.add("Person.id");
    arguments.add("Tag.id");
    writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeEntry(arguments);

    arguments.clear();
    arguments.add("Person.id");
    arguments.add("Organisation.id");
    arguments.add("workFrom");
    writers[FileNames.PERSON_WORK_AT.ordinal()].writeEntry(arguments);

    arguments.clear();
    arguments.add("Person.id");
    arguments.add("Organisation.id");
    arguments.add("classYear");
    writers[FileNames.PERSON_STUDY_AT.ordinal()].writeEntry(arguments);

    arguments.clear();
    arguments.add("Person.id");
    arguments.add("Person.id");
    arguments.add("creationDate");
    writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments);

}

From source file:ldbc.snb.datagen.serializer.snb.interactive.TurtlePersonActivitySerializer.java

@Override
public void initialize(Configuration conf, int reducerId) {

    dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
    int numFiles = FileNames.values().length;
    writers = new HDFSWriter[numFiles];
    for (int i = 0; i < numFiles; ++i) {
        writers[i] = new HDFSWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"),
                FileNames.values()[i].toString() + "_" + reducerId,
                conf.getInt("ldbc.snb.datagen.numPartitions", 1),
                conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "ttl");
        writers[i].writeAllPartitions(Turtle.getNamespaces());
        writers[i].writeAllPartitions(Turtle.getStaticNamespaces());
    }//from   w w  w  .j av a  2  s.c o  m
}

From source file:ldbc.snb.datagen.serializer.snb.interactive.TurtlePersonSerializer.java

License:Open Source License

public void initialize(Configuration conf, int reducerId) {
    dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
    int numFiles = FileNames.values().length;
    writers = new HDFSWriter[numFiles];
    for (int i = 0; i < numFiles; ++i) {
        writers[i] = new HDFSWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"),
                FileNames.values()[i].toString() + "_" + reducerId,
                conf.getInt("ldbc.snb.datagen.numPartitions", 1),
                conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "ttl");
        writers[i].writeAllPartitions(Turtle.getNamespaces());
        writers[i].writeAllPartitions(Turtle.getStaticNamespaces());
    }// www.j av a2s . c  o m
}

From source file:ldbc.socialnet.dbgen.generator.MRGenerateUsers.java

License:Open Source License

public int runGenerateJob(Configuration conf) throws Exception {
    FileSystem fs = FileSystem.get(conf);
    String hadoopDir = new String(conf.get("outputDir") + "/hadoop");
    String socialNetDir = new String(conf.get("outputDir") + "/social_network");
    int numThreads = Integer.parseInt(conf.get("numThreads"));
    System.out.println("NUMBER OF THREADS " + numThreads);

    /// --------- Execute Jobs ------
    long start = System.currentTimeMillis();

    /// --------------- First job Generating users----------------
    printProgress("Starting: Person generation");
    conf.set("pass", Integer.toString(0));
    Job job = new Job(conf, "SIB Generate Users & 1st Dimension");
    job.setMapOutputKeyClass(TupleKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(GenerateUsersMapper.class);
    job.setMapperClass(GenerateUsersMapper.class);
    job.setNumReduceTasks(numThreads);//from   www.  j a v  a2s. c o  m
    job.setInputFormatClass(NLineInputFormat.class);
    conf.setInt("mapred.line.input.format.linespermap", 1);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(hadoopDir) + "/mrInputFile");
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib"));
    job.waitForCompletion(true);

    /// --------------- Sorting by first dimension  ----------------
    printProgress("Starting: Sorting by first dimension");
    HadoopFileRanker fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class);
    fileRanker.run(hadoopDir + "/sib", hadoopDir + "/sibSorting");
    fs.delete(new Path(hadoopDir + "/sib"), true);

    /// --------------- job Generating First dimension Friendships  ----------------
    printProgress("Starting: Friendship generation 1.");
    conf.set("pass", Integer.toString(0));
    conf.set("dimension", Integer.toString(1));
    job = new Job(conf, "SIB Generate Friendship - Interest");
    job.setMapOutputKeyClass(ComposedKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(HadoopBlockMapper.class);
    job.setMapperClass(HadoopBlockMapper.class);
    job.setReducerClass(DimensionReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setPartitionerClass(HadoopBlockPartitioner.class);
    job.setSortComparatorClass(ComposedKeyComparator.class);
    job.setGroupingComparatorClass(ComposedKeyGroupComparator.class);

    FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting"));
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib2"));
    job.waitForCompletion(true);
    fs.delete(new Path(hadoopDir + "/sibSorting"), true);

    /// --------------- Sorting phase 2  ----------------
    printProgress("Starting: Sorting by second dimension");
    fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class);
    fileRanker.run(hadoopDir + "/sib2", hadoopDir + "/sibSorting2");
    fs.delete(new Path(hadoopDir + "/sib2"), true);

    /// --------------- Second job Generating Friendships  ----------------
    printProgress("Starting: Friendship generation 2.");
    conf.set("pass", Integer.toString(1));
    conf.set("dimension", Integer.toString(2));
    job = new Job(conf, "SIB Generate Friendship - Interest");
    job.setMapOutputKeyClass(ComposedKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(HadoopBlockMapper.class);
    job.setMapperClass(HadoopBlockMapper.class);
    job.setReducerClass(DimensionReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setPartitionerClass(HadoopBlockPartitioner.class);
    job.setSortComparatorClass(ComposedKeyComparator.class);
    job.setGroupingComparatorClass(ComposedKeyGroupComparator.class);
    FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting2"));
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib3"));
    job.waitForCompletion(true);
    fs.delete(new Path(hadoopDir + "/sibSorting2"), true);

    /// --------------- Sorting phase 3--------------
    printProgress("Starting: Sorting by third dimension");
    fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class);
    fileRanker.run(hadoopDir + "/sib3", hadoopDir + "/sibSorting3");
    fs.delete(new Path(hadoopDir + "/sib3"), true);

    /// --------------- Third job Generating Friendships----------------
    printProgress("Starting: Friendship generation 3.");
    conf.set("pass", Integer.toString(2));
    conf.set("dimension", Integer.toString(2));
    job = new Job(conf, "SIB Generate Friendship - Random");
    job.setMapOutputKeyClass(ComposedKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(HadoopBlockMapper.class);
    job.setMapperClass(HadoopBlockMapper.class);
    job.setReducerClass(DimensionReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setPartitionerClass(HadoopBlockPartitioner.class);
    job.setSortComparatorClass(ComposedKeyComparator.class);
    job.setGroupingComparatorClass(ComposedKeyGroupComparator.class);
    FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting3"));
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib4"));
    job.waitForCompletion(true);
    fs.delete(new Path(hadoopDir + "/sibSorting3"), true);

    /// --------------- Sorting phase 3--------------

    printProgress("Starting: Sorting by third dimension (for activity generation)");
    fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class);
    fileRanker.run(hadoopDir + "/sib4", hadoopDir + "/sibSorting4");
    fs.delete(new Path(hadoopDir + "/sib4"), true);

    /// --------------- Fourth job: Serialize static network ----------------

    printProgress("Starting: Generating person activity");
    job = new Job(conf, "Generate user activity");
    job.setMapOutputKeyClass(ComposedKey.class);
    job.setMapOutputValueClass(ReducedUserProfile.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(ReducedUserProfile.class);
    job.setJarByClass(HadoopBlockMapper.class);
    job.setMapperClass(HadoopBlockMapper.class);
    job.setReducerClass(UserActivityReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setPartitionerClass(HadoopBlockPartitioner.class);
    job.setSortComparatorClass(ComposedKeyComparator.class);
    job.setGroupingComparatorClass(ComposedKeyGroupComparator.class);
    FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting4"));
    FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib5"));
    job.waitForCompletion(true);
    fs.delete(new Path(hadoopDir + "/sib5"), true);

    int numEvents = 0;
    long min = Long.MAX_VALUE;
    long max = Long.MIN_VALUE;

    if (conf.getBoolean("updateStreams", false)) {
        for (int i = 0; i < numThreads; ++i) {
            int numPartitions = conf.getInt("numUpdatePartitions", 1);
            for (int j = 0; j < numPartitions; ++j) {
                /// --------------- Fifth job: Sort update streams ----------------
                conf.setInt("mapred.line.input.format.linespermap", 1000000);
                conf.setInt("reducerId", i);
                conf.setInt("partitionId", j);
                conf.set("streamType", "forum");
                Job jobForum = new Job(conf, "Soring update streams " + j + " of reducer " + i);
                jobForum.setMapOutputKeyClass(LongWritable.class);
                jobForum.setMapOutputValueClass(Text.class);
                jobForum.setOutputKeyClass(LongWritable.class);
                jobForum.setOutputValueClass(Text.class);
                jobForum.setJarByClass(UpdateEventMapper.class);
                jobForum.setMapperClass(UpdateEventMapper.class);
                jobForum.setReducerClass(UpdateEventReducer.class);
                jobForum.setNumReduceTasks(1);
                jobForum.setInputFormatClass(SequenceFileInputFormat.class);
                jobForum.setOutputFormatClass(SequenceFileOutputFormat.class);
                jobForum.setPartitionerClass(UpdateEventPartitioner.class);
                FileInputFormat.addInputPath(jobForum,
                        new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_forum"));
                FileOutputFormat.setOutputPath(jobForum, new Path(hadoopDir + "/sibEnd"));
                printProgress("Starting: Sorting update streams");
                jobForum.waitForCompletion(true);
                fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_forum"), false);
                fs.delete(new Path(hadoopDir + "/sibEnd"), true);

                conf.setInt("mapred.line.input.format.linespermap", 1000000);
                conf.setInt("reducerId", i);
                conf.setInt("partitionId", j);
                conf.set("streamType", "person");
                Job jobPerson = new Job(conf, "Soring update streams " + j + " of reducer " + i);
                jobPerson.setMapOutputKeyClass(LongWritable.class);
                jobPerson.setMapOutputValueClass(Text.class);
                jobPerson.setOutputKeyClass(LongWritable.class);
                jobPerson.setOutputValueClass(Text.class);
                jobPerson.setJarByClass(UpdateEventMapper.class);
                jobPerson.setMapperClass(UpdateEventMapper.class);
                jobPerson.setReducerClass(UpdateEventReducer.class);
                jobPerson.setNumReduceTasks(1);
                jobPerson.setInputFormatClass(SequenceFileInputFormat.class);
                jobPerson.setOutputFormatClass(SequenceFileOutputFormat.class);
                jobPerson.setPartitionerClass(UpdateEventPartitioner.class);
                FileInputFormat.addInputPath(jobPerson,
                        new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_person"));
                FileOutputFormat.setOutputPath(jobPerson, new Path(hadoopDir + "/sibEnd"));
                printProgress("Starting: Sorting update streams");
                jobPerson.waitForCompletion(true);
                fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_person"), false);
                fs.delete(new Path(hadoopDir + "/sibEnd"), true);

                if (conf.getBoolean("updateStreams", false)) {
                    Properties properties = new Properties();
                    FSDataInputStream file = fs.open(new Path(conf.get("outputDir")
                            + "/social_network/updateStream_" + i + "_" + j + "_person.properties"));
                    properties.load(file);
                    if (properties.getProperty("min_write_event_start_time") != null) {
                        Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time"));
                        min = auxMin < min ? auxMin : min;
                        Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time"));
                        max = auxMax > max ? auxMax : max;
                        numEvents += Long.parseLong(properties.getProperty("num_events"));
                    }
                    file.close();
                    file = fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_"
                            + j + "_forum.properties"));
                    properties.load(file);
                    if (properties.getProperty("min_write_event_start_time") != null) {
                        Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time"));
                        min = auxMin < min ? auxMin : min;
                        Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time"));
                        max = auxMax > max ? auxMax : max;
                        numEvents += Long.parseLong(properties.getProperty("num_events"));
                    }
                    file.close();
                    fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j
                            + "_person.properties"), true);
                    fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j
                            + "_forum.properties"), true);
                }
            }
        }

        if (conf.getBoolean("updateStreams", false)) {
            OutputStream output = fs
                    .create(new Path(conf.get("outputDir") + "/social_network/updateStream.properties"));
            output.write(new String("ldbc.snb.interactive.gct_delta_duration:" + conf.get("deltaTime") + "\n")
                    .getBytes());
            output.write(
                    new String("ldbc.snb.interactive.min_write_event_start_time:" + min + "\n").getBytes());
            output.write(
                    new String("ldbc.snb.interactive.max_write_event_start_time:" + max + "\n").getBytes());
            output.write(new String("ldbc.snb.interactive.update_interleave:" + (max - min) / numEvents + "\n")
                    .getBytes());
            output.write(new String("ldbc.snb.interactive.num_events:" + numEvents).getBytes());
            output.close();
        }
    }

    /// --------------- Sixth job: Materialize the friends lists ----------------
    /*        Job job6 = new Job(conf,"Dump the friends lists");
            job6.setMapOutputKeyClass(ComposedKey.class);
            job6.setMapOutputValueClass(ReducedUserProfile.class);
            job6.setOutputKeyClass(ComposedKey.class);
            job6.setOutputValueClass(ReducedUserProfile.class);
            job6.setJarByClass(HadoopBlockMapper.class);
            job6.setMapperClass(HadoopBlockMapper.class);
            job6.setReducerClass(FriendListOutputReducer.class);
            job6.setNumReduceTasks(numThreads);
            job6.setInputFormatClass(SequenceFileInputFormat.class);
            job6.setOutputFormatClass(SequenceFileOutputFormat.class);
            job6.setPartitionerClass(HadoopBlockPartitioner.class);
            job6.setSortComparatorClass(ComposedKeyComparator.class);
            job6.setGroupingComparatorClass(ComposedKeyGroupComparator.class);
            FileInputFormat.setInputPaths(job6, new Path(hadoopDir + "/sibSorting4"));
            FileOutputFormat.setOutputPath(job6, new Path(hadoopDir + "/job6") );
            
            
            printProgress("Starting: Materialize friends for substitution parameters");
            int resMaterializeFriends = job6.waitForCompletion(true) ? 0 : 1;
            fs.delete(new Path(hadoopDir + "/sibSorting3"),true);
            */

    long end = System.currentTimeMillis();
    System.out.println(((end - start) / 1000) + " total seconds");
    for (int i = 0; i < numThreads; ++i) {
        fs.copyToLocalFile(new Path(socialNetDir + "/m" + i + "factors.txt"), new Path("./"));
        fs.copyToLocalFile(new Path(socialNetDir + "/m0friendList" + i + ".csv"), new Path("./"));
    }
    return 0;
}

From source file:mappers.BlockMultiplicationGroupingMapper.java

License:Apache License

public void map(Text key, T value, Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    int nRL = conf.getInt("NRL", 0);
    int nCL = conf.getInt("NCL", 0);
    int nRR = conf.getInt("NRR", 0);
    int nCR = conf.getInt("NCR", 0);

    boolean lTrans = conf.getBoolean("LTRANS", false);
    boolean rTrans = conf.getBoolean("RTRANS", false);

    String lName = conf.get("LEFTNAME");
    String rName = conf.get("RIGHTNAME");

    //can get the block indices from the text key or from the .nR and .nC fields
    String line = key.toString();
    String[] parts = line.split(",");
    String name = parts[0];//  w  w  w .j  a va2s.  c om

    //key format is "i,j,k" 
    if (name.equals(lName)) {

        System.out.println("left" + key.toString());
        for (int j = 0; j < (rTrans ? nRR : nCR); j++) {
            outKey.set((lTrans ? parts[2] : parts[1]) + "," + String.valueOf(j) + ","
                    + (lTrans ? parts[1] : parts[2]));
            context.write(outKey, value);
        }
    } else {
        System.out.println("right" + key.toString());
        for (int i = 0; i < (lTrans ? nCL : nRL); i++) {
            outKey.set(String.valueOf(i) + "," + (rTrans ? parts[1] : parts[2]) + ","
                    + (rTrans ? parts[2] : parts[1]));
            context.write(outKey, value);
        }
    }
}

From source file:mappers.SquareBlockTraceMultiplicationGroupingMapper.java

License:Apache License

public void map(Text key, MatrixBlock value, Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    boolean lTrans = conf.getBoolean("LTRANS", false);
    boolean rTrans = conf.getBoolean("RTRANS", false);

    String lName = conf.get("LEFTNAME");

    //can get the block indices from the text key or from the .nR and .nC fields
    String line = key.toString();
    String[] parts = line.split(",");
    String name = parts[0];/*from w  w w. ja  va  2  s .co  m*/

    //key format is "i,j,k" 
    if (name.equals(lName)) {

        int row = lTrans ? Integer.parseInt(parts[2]) : Integer.parseInt(parts[1]);
        int col = lTrans ? Integer.parseInt(parts[1]) : Integer.parseInt(parts[2]);

        outKey.set(String.valueOf(row) + "," + String.valueOf(row) + "," + String.valueOf(col));
        context.write(outKey, value);
    }

    else {

        int row = rTrans ? Integer.parseInt(parts[2]) : Integer.parseInt(parts[1]);
        int col = rTrans ? Integer.parseInt(parts[1]) : Integer.parseInt(parts[2]);

        outKey.set(String.valueOf(col) + "," + String.valueOf(col) + "," + String.valueOf(row));
        context.write(outKey, value);
    }
}

From source file:matrixFormat.MatrixRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    //FileSplit split = (FileSplit) genericSplit;

    MatrixFileSplit split = (MatrixFileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    method = (job.get("method").compareTo("IPB") == 0) ? 1
            : ((job.get("method").compareTo("OPB") == 0) ? 2 : 0);
    sparse = job.getBoolean("Sparse", false);
    this.maxLength = job.getInt("mapred.matrixrecordreader.maxlength", Integer.MAX_VALUE);

    start1 = split.getStart();//from   ww  w  . j a v  a  2  s.co  m
    start2 = split.getStart(1);
    end1 = start1 + split.getLength(0);
    end2 = start2 + split.getLength(1);
    blkID = split.getId();
    final Path file = split.getPath(0);
    final Path file2 = split.getPath(1);
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FileSystem fs2 = file2.getFileSystem(job);
    FSDataInputStream fileIn1 = fs.open(split.getPath(0));
    FSDataInputStream fileIn2 = fs2.open(split.getPath(1));
    //FileInputStream fileIn2 = new FileInputStream(file2.toString()); 
    //Don't care the compression stuff
    /*if (isCompressedInput()) {
      decompressor = CodecPool.getDecompressor(codec);
      if (codec instanceof SplittableCompressionCodec) {
        final SplitCompressionInputStream cIn =
          ((SplittableCompressionCodec)codec).createInputStream(
    fileIn1, decompressor, start1, end1,
    SplittableCompressionCodec.READ_MODE.BYBLOCK);
        final SplitCompressionInputStream cIn2 =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn2, decompressor, start2, end2,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
        in = new MatrixReader(cIn, cIn2);
        start1 = cIn.getAdjustedStart();
        end1 = cIn.getAdjustedEnd();
        filePosition1 = cIn;
      } else {
        in = new MatrixReader(codec.createInputStream(fileIn1, decompressor), codec.createInputStream(fileIn2, decompressor), job, split.getStarts(0), split.getStarts(1) );
        filePosition1 = fileIn1;
      }
    } else {*/
    fileIn1.seek(start1);
    fileIn2.seek(start2);
    if (sparse) {
        in = new MatrixReader(fileIn1, fileIn2, job, split.getStart(0), split.getStart(1));
    } else {
        in = new MatrixReader(fileIn1, fileIn2, job, split.getStarts(0), split.getStarts(1));
    }

    //in = new MatrixReader(file, file2, job, split.getStarts(0), split.getStarts(1));
    filePosition1 = fileIn1;
    filePosition2 = fileIn2;
    //}

    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    /*if (start1 != 0) {
       start1 += in.readOldBlock(maxLength, maxBytesToConsume(pos1));
       this.pos1 = start1;
    }
            
    in.readBlocks(maxLength, maxBytesToConsume(pos1));
    start1 += in.getBytesComsumed(0);
    //start2 += in.getBytesComsumed(1);
    this.pos1 = start1;*/
}

From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java

License:Apache License

private static void checkZkServerSetting(Configuration conf, CommandLine cmdLine) {
    if (!cmdLine.hasOption("-z")) {
        System.err.println("WARN: ZooKeeper server is not set, embeded ZooKeeper server will be started.");
        System.err.println(//from www . j  a  v  a2s.c  om
                "WARN: For big data guagua application with fail-over zookeeper servers, independent ZooKeeper instances are recommended.");
        System.err.println("WARN: Zookeeper servers can be provided by '-z' parameter with non-empty value.");
        // change default embedded zookeeper server to master zonde
        boolean isZkInClient = conf.getBoolean(GuaguaConstants.GUAGUA_ZK_EMBEDBED_IS_IN_CLIENT, false);
        if (isZkInClient) {
            synchronized (GuaguaMapReduceClient.class) {
                if (embededZooKeeperServer == null) {
                    // 1. start embed zookeeper server in one thread.
                    int embedZkClientPort = 0;
                    try {
                        embedZkClientPort = ZooKeeperUtils.startEmbedZooKeeper();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                    // 2. check if it is started.
                    ZooKeeperUtils.checkIfEmbedZooKeeperStarted(embedZkClientPort);
                    try {
                        embededZooKeeperServer = InetAddress.getLocalHost().getHostName() + ":"
                                + embedZkClientPort;
                    } catch (UnknownHostException e) {
                        throw new RuntimeException(e);
                    }
                }
            }
            // 3. set local embed zookeeper server address
            conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, embededZooKeeperServer);
        } else {
            conf.set(GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS, conf.get(
                    GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS,
                    "ml.shifu.guagua.master.MasterTimer,ml.shifu.guagua.master.MemoryStatsMasterInterceptor,ml.shifu.guagua.hadoop.ZooKeeperMasterInterceptor,ml.shifu.guagua.master.NettyMasterCoordinator "));
            conf.set(GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS, conf.get(
                    GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS,
                    "ml.shifu.guagua.worker.WorkerTimer,ml.shifu.guagua.worker.MemoryStatsWorkerInterceptor,ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor,ml.shifu.guagua.worker.NettyWorkerCoordinator"));
            System.err.println("WARN: Zookeeper server will be started in master node of cluster");
        }
        return;
    } else {
        String zkServers = cmdLine.getOptionValue("z");
        if (zkServers == null || zkServers.length() == 0) {
            throw new IllegalArgumentException(
                    "Zookeeper servers should be provided by '-z' parameter with non-empty value.");
        }
        if (ZooKeeperUtils.checkServers(zkServers)) {
            conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, zkServers.trim());
        } else {
            throw new RuntimeException("Your specifed zookeeper instance is not alive, please check.");
        }
    }
}

From source file:ml.shifu.guagua.yarn.GuaguaYarnClient.java

License:Apache License

private static void checkZkServerSetting(Configuration conf, CommandLine cmdLine) {
    if (!cmdLine.hasOption("-z")) {
        System.err.println("WARN: ZooKeeper server is not set, embeded ZooKeeper server will be started.");
        System.err.println(/*from  w ww  . j  a v a  2 s  . c  o  m*/
                "WARN: For big data guagua application with fail-over zookeeper servers, independent ZooKeeper instances are recommended.");
        System.err.println("WARN: Zookeeper servers can be provided by '-z' parameter with non-empty value.");

        boolean isZkInClient = conf.getBoolean(GuaguaConstants.GUAGUA_ZK_EMBEDBED_IS_IN_CLIENT, false);
        if (isZkInClient) {
            synchronized (GuaguaYarnClient.class) {
                if (embededZooKeeperServer == null) {
                    // 1. start embed zookeeper server in one thread.
                    int embedZkClientPort = 0;
                    try {
                        embedZkClientPort = ZooKeeperUtils.startEmbedZooKeeper();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                    // 2. check if it is started.
                    ZooKeeperUtils.checkIfEmbedZooKeeperStarted(embedZkClientPort);
                    try {
                        embededZooKeeperServer = InetAddress.getLocalHost().getHostName() + ":"
                                + embedZkClientPort;
                    } catch (UnknownHostException e) {
                        throw new RuntimeException(e);
                    }
                }
            }
            // 3. set local embed zookeeper server address
            conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, embededZooKeeperServer);
        } else {
            conf.set(GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS, conf.get(
                    GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS,
                    "ml.shifu.guagua.master.MasterTimer,ml.shifu.guagua.master.MemoryStatsMasterInterceptor,ml.shifu.guagua.hadoop.ZooKeeperMasterInterceptor,ml.shifu.guagua.master.NettyMasterCoordinator "));
            conf.set(GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS, conf.get(
                    GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS,
                    "ml.shifu.guagua.worker.WorkerTimer,ml.shifu.guagua.worker.MemoryStatsWorkerInterceptor,ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor,ml.shifu.guagua.worker.NettyWorkerCoordinator"));
            System.err.println("WARN: Zookeeper server will be started in master node of cluster");
        }
        return;
    } else {
        String zkServers = cmdLine.getOptionValue("z");
        if (zkServers == null || zkServers.length() == 0) {
            throw new IllegalArgumentException(
                    "Zookeeper servers should be provided by '-z' parameter with non-empty value.");
        }
        if (ZooKeeperUtils.checkServers(zkServers)) {
            conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, zkServers.trim());
        } else {
            throw new RuntimeException("Your specifed zookeeper instance is not alive, please check.");
        }
    }
}