List of usage examples for org.apache.hadoop.conf Configuration getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer.java
@Override public void initialize(Configuration conf, int reducerId) { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false)); }/*from w ww . java2 s .co m*/ arguments = new ArrayList<String>(); arguments.add("id"); arguments.add("title"); arguments.add("creationDate"); writers[FileNames.FORUM.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Forum.id"); arguments.add("Post.id"); writers[FileNames.FORUM_CONTAINEROF_POST.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Forum.id"); arguments.add("Person.id"); arguments.add("joinDate"); writers[FileNames.FORUM_HASMEMBER_PERSON.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Forum.id"); arguments.add("Person.id"); writers[FileNames.FORUM_HASMODERATOR_PERSON.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Forum.id"); arguments.add("Tag.id"); writers[FileNames.FORUM_HASTAG_TAG.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Post.id"); arguments.add("creationDate"); writers[FileNames.PERSON_LIKES_POST.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Comment.id"); arguments.add("creationDate"); writers[FileNames.PERSON_LIKES_COMMENT.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("id"); arguments.add("imageFile"); arguments.add("creationDate"); arguments.add("locationIP"); arguments.add("browserUsed"); arguments.add("language"); arguments.add("content"); arguments.add("length"); writers[FileNames.POST.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Post.id"); arguments.add("Person.id"); writers[FileNames.POST_HASCREATOR_PERSON.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Post.id"); arguments.add("Tag.id"); writers[FileNames.POST_HASTAG_TAG.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Post.id"); arguments.add("Place.id"); writers[FileNames.POST_ISLOCATEDIN_PLACE.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("id"); arguments.add("creationDate"); arguments.add("locationIP"); arguments.add("browserUsed"); arguments.add("content"); arguments.add("length"); writers[FileNames.COMMENT.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Comment.id"); arguments.add("Person.id"); writers[FileNames.COMMENT_HASCREATOR_PERSON.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Comment.id"); arguments.add("Tag.id"); writers[FileNames.COMMENT_HASTAG_TAG.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Comment.id"); arguments.add("Place.id"); writers[FileNames.COMMENT_ISLOCATEDIN_PLACE.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Comment.id"); arguments.add("Post.id"); writers[FileNames.COMMENT_REPLYOF_POST.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Comment.id"); arguments.add("Comment.id"); writers[FileNames.COMMENT_REPLYOF_COMMENT.ordinal()].writeEntry(arguments); arguments.clear(); }
From source file:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer.java
License:Open Source License
public void initialize(Configuration conf, int reducerId) { int numFiles = FileNames.values().length; writers = new HDFSCSVWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.serializer.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false)); }//from w w w .ja v a 2 s .c om ArrayList<String> arguments = new ArrayList<String>(); arguments.add("id"); arguments.add("firstName"); arguments.add("lastName"); arguments.add("gender"); arguments.add("birthday"); arguments.add("creationDate"); arguments.add("locationIP"); arguments.add("browserUsed"); writers[FileNames.PERSON.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("language"); writers[FileNames.PERSON_SPEAKS_LANGUAGE.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("email"); writers[FileNames.PERSON_HAS_EMAIL.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Place.id"); writers[FileNames.PERSON_LOCATED_IN_PLACE.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Tag.id"); writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Organisation.id"); arguments.add("workFrom"); writers[FileNames.PERSON_WORK_AT.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Organisation.id"); arguments.add("classYear"); writers[FileNames.PERSON_STUDY_AT.ordinal()].writeEntry(arguments); arguments.clear(); arguments.add("Person.id"); arguments.add("Person.id"); arguments.add("creationDate"); writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments); }
From source file:ldbc.snb.datagen.serializer.snb.interactive.TurtlePersonActivitySerializer.java
@Override public void initialize(Configuration conf, int reducerId) { dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); int numFiles = FileNames.values().length; writers = new HDFSWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "ttl"); writers[i].writeAllPartitions(Turtle.getNamespaces()); writers[i].writeAllPartitions(Turtle.getStaticNamespaces()); }//from w w w .j av a 2 s.c o m }
From source file:ldbc.snb.datagen.serializer.snb.interactive.TurtlePersonSerializer.java
License:Open Source License
public void initialize(Configuration conf, int reducerId) { dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); int numFiles = FileNames.values().length; writers = new HDFSWriter[numFiles]; for (int i = 0; i < numFiles; ++i) { writers[i] = new HDFSWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "ttl"); writers[i].writeAllPartitions(Turtle.getNamespaces()); writers[i].writeAllPartitions(Turtle.getStaticNamespaces()); }// www.j av a2s . c o m }
From source file:ldbc.socialnet.dbgen.generator.MRGenerateUsers.java
License:Open Source License
public int runGenerateJob(Configuration conf) throws Exception { FileSystem fs = FileSystem.get(conf); String hadoopDir = new String(conf.get("outputDir") + "/hadoop"); String socialNetDir = new String(conf.get("outputDir") + "/social_network"); int numThreads = Integer.parseInt(conf.get("numThreads")); System.out.println("NUMBER OF THREADS " + numThreads); /// --------- Execute Jobs ------ long start = System.currentTimeMillis(); /// --------------- First job Generating users---------------- printProgress("Starting: Person generation"); conf.set("pass", Integer.toString(0)); Job job = new Job(conf, "SIB Generate Users & 1st Dimension"); job.setMapOutputKeyClass(TupleKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(GenerateUsersMapper.class); job.setMapperClass(GenerateUsersMapper.class); job.setNumReduceTasks(numThreads);//from www. j a v a2s. c o m job.setInputFormatClass(NLineInputFormat.class); conf.setInt("mapred.line.input.format.linespermap", 1); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir) + "/mrInputFile"); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib")); job.waitForCompletion(true); /// --------------- Sorting by first dimension ---------------- printProgress("Starting: Sorting by first dimension"); HadoopFileRanker fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class); fileRanker.run(hadoopDir + "/sib", hadoopDir + "/sibSorting"); fs.delete(new Path(hadoopDir + "/sib"), true); /// --------------- job Generating First dimension Friendships ---------------- printProgress("Starting: Friendship generation 1."); conf.set("pass", Integer.toString(0)); conf.set("dimension", Integer.toString(1)); job = new Job(conf, "SIB Generate Friendship - Interest"); job.setMapOutputKeyClass(ComposedKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(HadoopBlockMapper.class); job.setMapperClass(HadoopBlockMapper.class); job.setReducerClass(DimensionReducer.class); job.setNumReduceTasks(numThreads); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(HadoopBlockPartitioner.class); job.setSortComparatorClass(ComposedKeyComparator.class); job.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting")); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib2")); job.waitForCompletion(true); fs.delete(new Path(hadoopDir + "/sibSorting"), true); /// --------------- Sorting phase 2 ---------------- printProgress("Starting: Sorting by second dimension"); fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class); fileRanker.run(hadoopDir + "/sib2", hadoopDir + "/sibSorting2"); fs.delete(new Path(hadoopDir + "/sib2"), true); /// --------------- Second job Generating Friendships ---------------- printProgress("Starting: Friendship generation 2."); conf.set("pass", Integer.toString(1)); conf.set("dimension", Integer.toString(2)); job = new Job(conf, "SIB Generate Friendship - Interest"); job.setMapOutputKeyClass(ComposedKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(HadoopBlockMapper.class); job.setMapperClass(HadoopBlockMapper.class); job.setReducerClass(DimensionReducer.class); job.setNumReduceTasks(numThreads); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(HadoopBlockPartitioner.class); job.setSortComparatorClass(ComposedKeyComparator.class); job.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting2")); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib3")); job.waitForCompletion(true); fs.delete(new Path(hadoopDir + "/sibSorting2"), true); /// --------------- Sorting phase 3-------------- printProgress("Starting: Sorting by third dimension"); fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class); fileRanker.run(hadoopDir + "/sib3", hadoopDir + "/sibSorting3"); fs.delete(new Path(hadoopDir + "/sib3"), true); /// --------------- Third job Generating Friendships---------------- printProgress("Starting: Friendship generation 3."); conf.set("pass", Integer.toString(2)); conf.set("dimension", Integer.toString(2)); job = new Job(conf, "SIB Generate Friendship - Random"); job.setMapOutputKeyClass(ComposedKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(HadoopBlockMapper.class); job.setMapperClass(HadoopBlockMapper.class); job.setReducerClass(DimensionReducer.class); job.setNumReduceTasks(numThreads); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(HadoopBlockPartitioner.class); job.setSortComparatorClass(ComposedKeyComparator.class); job.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting3")); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib4")); job.waitForCompletion(true); fs.delete(new Path(hadoopDir + "/sibSorting3"), true); /// --------------- Sorting phase 3-------------- printProgress("Starting: Sorting by third dimension (for activity generation)"); fileRanker = new HadoopFileRanker(conf, TupleKey.class, ReducedUserProfile.class); fileRanker.run(hadoopDir + "/sib4", hadoopDir + "/sibSorting4"); fs.delete(new Path(hadoopDir + "/sib4"), true); /// --------------- Fourth job: Serialize static network ---------------- printProgress("Starting: Generating person activity"); job = new Job(conf, "Generate user activity"); job.setMapOutputKeyClass(ComposedKey.class); job.setMapOutputValueClass(ReducedUserProfile.class); job.setOutputKeyClass(TupleKey.class); job.setOutputValueClass(ReducedUserProfile.class); job.setJarByClass(HadoopBlockMapper.class); job.setMapperClass(HadoopBlockMapper.class); job.setReducerClass(UserActivityReducer.class); job.setNumReduceTasks(numThreads); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setPartitionerClass(HadoopBlockPartitioner.class); job.setSortComparatorClass(ComposedKeyComparator.class); job.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job, new Path(hadoopDir + "/sibSorting4")); FileOutputFormat.setOutputPath(job, new Path(hadoopDir + "/sib5")); job.waitForCompletion(true); fs.delete(new Path(hadoopDir + "/sib5"), true); int numEvents = 0; long min = Long.MAX_VALUE; long max = Long.MIN_VALUE; if (conf.getBoolean("updateStreams", false)) { for (int i = 0; i < numThreads; ++i) { int numPartitions = conf.getInt("numUpdatePartitions", 1); for (int j = 0; j < numPartitions; ++j) { /// --------------- Fifth job: Sort update streams ---------------- conf.setInt("mapred.line.input.format.linespermap", 1000000); conf.setInt("reducerId", i); conf.setInt("partitionId", j); conf.set("streamType", "forum"); Job jobForum = new Job(conf, "Soring update streams " + j + " of reducer " + i); jobForum.setMapOutputKeyClass(LongWritable.class); jobForum.setMapOutputValueClass(Text.class); jobForum.setOutputKeyClass(LongWritable.class); jobForum.setOutputValueClass(Text.class); jobForum.setJarByClass(UpdateEventMapper.class); jobForum.setMapperClass(UpdateEventMapper.class); jobForum.setReducerClass(UpdateEventReducer.class); jobForum.setNumReduceTasks(1); jobForum.setInputFormatClass(SequenceFileInputFormat.class); jobForum.setOutputFormatClass(SequenceFileOutputFormat.class); jobForum.setPartitionerClass(UpdateEventPartitioner.class); FileInputFormat.addInputPath(jobForum, new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_forum")); FileOutputFormat.setOutputPath(jobForum, new Path(hadoopDir + "/sibEnd")); printProgress("Starting: Sorting update streams"); jobForum.waitForCompletion(true); fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_forum"), false); fs.delete(new Path(hadoopDir + "/sibEnd"), true); conf.setInt("mapred.line.input.format.linespermap", 1000000); conf.setInt("reducerId", i); conf.setInt("partitionId", j); conf.set("streamType", "person"); Job jobPerson = new Job(conf, "Soring update streams " + j + " of reducer " + i); jobPerson.setMapOutputKeyClass(LongWritable.class); jobPerson.setMapOutputValueClass(Text.class); jobPerson.setOutputKeyClass(LongWritable.class); jobPerson.setOutputValueClass(Text.class); jobPerson.setJarByClass(UpdateEventMapper.class); jobPerson.setMapperClass(UpdateEventMapper.class); jobPerson.setReducerClass(UpdateEventReducer.class); jobPerson.setNumReduceTasks(1); jobPerson.setInputFormatClass(SequenceFileInputFormat.class); jobPerson.setOutputFormatClass(SequenceFileOutputFormat.class); jobPerson.setPartitionerClass(UpdateEventPartitioner.class); FileInputFormat.addInputPath(jobPerson, new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_person")); FileOutputFormat.setOutputPath(jobPerson, new Path(hadoopDir + "/sibEnd")); printProgress("Starting: Sorting update streams"); jobPerson.waitForCompletion(true); fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i + "_" + j + "_person"), false); fs.delete(new Path(hadoopDir + "/sibEnd"), true); if (conf.getBoolean("updateStreams", false)) { Properties properties = new Properties(); FSDataInputStream file = fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_person.properties")); properties.load(file); if (properties.getProperty("min_write_event_start_time") != null) { Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time")); min = auxMin < min ? auxMin : min; Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time")); max = auxMax > max ? auxMax : max; numEvents += Long.parseLong(properties.getProperty("num_events")); } file.close(); file = fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_forum.properties")); properties.load(file); if (properties.getProperty("min_write_event_start_time") != null) { Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time")); min = auxMin < min ? auxMin : min; Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time")); max = auxMax > max ? auxMax : max; numEvents += Long.parseLong(properties.getProperty("num_events")); } file.close(); fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_person.properties"), true); fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_forum.properties"), true); } } } if (conf.getBoolean("updateStreams", false)) { OutputStream output = fs .create(new Path(conf.get("outputDir") + "/social_network/updateStream.properties")); output.write(new String("ldbc.snb.interactive.gct_delta_duration:" + conf.get("deltaTime") + "\n") .getBytes()); output.write( new String("ldbc.snb.interactive.min_write_event_start_time:" + min + "\n").getBytes()); output.write( new String("ldbc.snb.interactive.max_write_event_start_time:" + max + "\n").getBytes()); output.write(new String("ldbc.snb.interactive.update_interleave:" + (max - min) / numEvents + "\n") .getBytes()); output.write(new String("ldbc.snb.interactive.num_events:" + numEvents).getBytes()); output.close(); } } /// --------------- Sixth job: Materialize the friends lists ---------------- /* Job job6 = new Job(conf,"Dump the friends lists"); job6.setMapOutputKeyClass(ComposedKey.class); job6.setMapOutputValueClass(ReducedUserProfile.class); job6.setOutputKeyClass(ComposedKey.class); job6.setOutputValueClass(ReducedUserProfile.class); job6.setJarByClass(HadoopBlockMapper.class); job6.setMapperClass(HadoopBlockMapper.class); job6.setReducerClass(FriendListOutputReducer.class); job6.setNumReduceTasks(numThreads); job6.setInputFormatClass(SequenceFileInputFormat.class); job6.setOutputFormatClass(SequenceFileOutputFormat.class); job6.setPartitionerClass(HadoopBlockPartitioner.class); job6.setSortComparatorClass(ComposedKeyComparator.class); job6.setGroupingComparatorClass(ComposedKeyGroupComparator.class); FileInputFormat.setInputPaths(job6, new Path(hadoopDir + "/sibSorting4")); FileOutputFormat.setOutputPath(job6, new Path(hadoopDir + "/job6") ); printProgress("Starting: Materialize friends for substitution parameters"); int resMaterializeFriends = job6.waitForCompletion(true) ? 0 : 1; fs.delete(new Path(hadoopDir + "/sibSorting3"),true); */ long end = System.currentTimeMillis(); System.out.println(((end - start) / 1000) + " total seconds"); for (int i = 0; i < numThreads; ++i) { fs.copyToLocalFile(new Path(socialNetDir + "/m" + i + "factors.txt"), new Path("./")); fs.copyToLocalFile(new Path(socialNetDir + "/m0friendList" + i + ".csv"), new Path("./")); } return 0; }
From source file:mappers.BlockMultiplicationGroupingMapper.java
License:Apache License
public void map(Text key, T value, Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int nRL = conf.getInt("NRL", 0); int nCL = conf.getInt("NCL", 0); int nRR = conf.getInt("NRR", 0); int nCR = conf.getInt("NCR", 0); boolean lTrans = conf.getBoolean("LTRANS", false); boolean rTrans = conf.getBoolean("RTRANS", false); String lName = conf.get("LEFTNAME"); String rName = conf.get("RIGHTNAME"); //can get the block indices from the text key or from the .nR and .nC fields String line = key.toString(); String[] parts = line.split(","); String name = parts[0];// w w w .j a va2s. c om //key format is "i,j,k" if (name.equals(lName)) { System.out.println("left" + key.toString()); for (int j = 0; j < (rTrans ? nRR : nCR); j++) { outKey.set((lTrans ? parts[2] : parts[1]) + "," + String.valueOf(j) + "," + (lTrans ? parts[1] : parts[2])); context.write(outKey, value); } } else { System.out.println("right" + key.toString()); for (int i = 0; i < (lTrans ? nCL : nRL); i++) { outKey.set(String.valueOf(i) + "," + (rTrans ? parts[1] : parts[2]) + "," + (rTrans ? parts[2] : parts[1])); context.write(outKey, value); } } }
From source file:mappers.SquareBlockTraceMultiplicationGroupingMapper.java
License:Apache License
public void map(Text key, MatrixBlock value, Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); boolean lTrans = conf.getBoolean("LTRANS", false); boolean rTrans = conf.getBoolean("RTRANS", false); String lName = conf.get("LEFTNAME"); //can get the block indices from the text key or from the .nR and .nC fields String line = key.toString(); String[] parts = line.split(","); String name = parts[0];/*from w w w. ja va 2 s .co m*/ //key format is "i,j,k" if (name.equals(lName)) { int row = lTrans ? Integer.parseInt(parts[2]) : Integer.parseInt(parts[1]); int col = lTrans ? Integer.parseInt(parts[1]) : Integer.parseInt(parts[2]); outKey.set(String.valueOf(row) + "," + String.valueOf(row) + "," + String.valueOf(col)); context.write(outKey, value); } else { int row = rTrans ? Integer.parseInt(parts[2]) : Integer.parseInt(parts[1]); int col = rTrans ? Integer.parseInt(parts[1]) : Integer.parseInt(parts[2]); outKey.set(String.valueOf(col) + "," + String.valueOf(col) + "," + String.valueOf(row)); context.write(outKey, value); } }
From source file:matrixFormat.MatrixRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { //FileSplit split = (FileSplit) genericSplit; MatrixFileSplit split = (MatrixFileSplit) genericSplit; Configuration job = context.getConfiguration(); method = (job.get("method").compareTo("IPB") == 0) ? 1 : ((job.get("method").compareTo("OPB") == 0) ? 2 : 0); sparse = job.getBoolean("Sparse", false); this.maxLength = job.getInt("mapred.matrixrecordreader.maxlength", Integer.MAX_VALUE); start1 = split.getStart();//from ww w . j a v a 2 s.co m start2 = split.getStart(1); end1 = start1 + split.getLength(0); end2 = start2 + split.getLength(1); blkID = split.getId(); final Path file = split.getPath(0); final Path file2 = split.getPath(1); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FileSystem fs2 = file2.getFileSystem(job); FSDataInputStream fileIn1 = fs.open(split.getPath(0)); FSDataInputStream fileIn2 = fs2.open(split.getPath(1)); //FileInputStream fileIn2 = new FileInputStream(file2.toString()); //Don't care the compression stuff /*if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream( fileIn1, decompressor, start1, end1, SplittableCompressionCodec.READ_MODE.BYBLOCK); final SplitCompressionInputStream cIn2 = ((SplittableCompressionCodec)codec).createInputStream( fileIn2, decompressor, start2, end2, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new MatrixReader(cIn, cIn2); start1 = cIn.getAdjustedStart(); end1 = cIn.getAdjustedEnd(); filePosition1 = cIn; } else { in = new MatrixReader(codec.createInputStream(fileIn1, decompressor), codec.createInputStream(fileIn2, decompressor), job, split.getStarts(0), split.getStarts(1) ); filePosition1 = fileIn1; } } else {*/ fileIn1.seek(start1); fileIn2.seek(start2); if (sparse) { in = new MatrixReader(fileIn1, fileIn2, job, split.getStart(0), split.getStart(1)); } else { in = new MatrixReader(fileIn1, fileIn2, job, split.getStarts(0), split.getStarts(1)); } //in = new MatrixReader(file, file2, job, split.getStarts(0), split.getStarts(1)); filePosition1 = fileIn1; filePosition2 = fileIn2; //} // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. /*if (start1 != 0) { start1 += in.readOldBlock(maxLength, maxBytesToConsume(pos1)); this.pos1 = start1; } in.readBlocks(maxLength, maxBytesToConsume(pos1)); start1 += in.getBytesComsumed(0); //start2 += in.getBytesComsumed(1); this.pos1 = start1;*/ }
From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java
License:Apache License
private static void checkZkServerSetting(Configuration conf, CommandLine cmdLine) { if (!cmdLine.hasOption("-z")) { System.err.println("WARN: ZooKeeper server is not set, embeded ZooKeeper server will be started."); System.err.println(//from www . j a v a2s.c om "WARN: For big data guagua application with fail-over zookeeper servers, independent ZooKeeper instances are recommended."); System.err.println("WARN: Zookeeper servers can be provided by '-z' parameter with non-empty value."); // change default embedded zookeeper server to master zonde boolean isZkInClient = conf.getBoolean(GuaguaConstants.GUAGUA_ZK_EMBEDBED_IS_IN_CLIENT, false); if (isZkInClient) { synchronized (GuaguaMapReduceClient.class) { if (embededZooKeeperServer == null) { // 1. start embed zookeeper server in one thread. int embedZkClientPort = 0; try { embedZkClientPort = ZooKeeperUtils.startEmbedZooKeeper(); } catch (IOException e) { throw new RuntimeException(e); } // 2. check if it is started. ZooKeeperUtils.checkIfEmbedZooKeeperStarted(embedZkClientPort); try { embededZooKeeperServer = InetAddress.getLocalHost().getHostName() + ":" + embedZkClientPort; } catch (UnknownHostException e) { throw new RuntimeException(e); } } } // 3. set local embed zookeeper server address conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, embededZooKeeperServer); } else { conf.set(GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS, conf.get( GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS, "ml.shifu.guagua.master.MasterTimer,ml.shifu.guagua.master.MemoryStatsMasterInterceptor,ml.shifu.guagua.hadoop.ZooKeeperMasterInterceptor,ml.shifu.guagua.master.NettyMasterCoordinator ")); conf.set(GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS, conf.get( GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS, "ml.shifu.guagua.worker.WorkerTimer,ml.shifu.guagua.worker.MemoryStatsWorkerInterceptor,ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor,ml.shifu.guagua.worker.NettyWorkerCoordinator")); System.err.println("WARN: Zookeeper server will be started in master node of cluster"); } return; } else { String zkServers = cmdLine.getOptionValue("z"); if (zkServers == null || zkServers.length() == 0) { throw new IllegalArgumentException( "Zookeeper servers should be provided by '-z' parameter with non-empty value."); } if (ZooKeeperUtils.checkServers(zkServers)) { conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, zkServers.trim()); } else { throw new RuntimeException("Your specifed zookeeper instance is not alive, please check."); } } }
From source file:ml.shifu.guagua.yarn.GuaguaYarnClient.java
License:Apache License
private static void checkZkServerSetting(Configuration conf, CommandLine cmdLine) { if (!cmdLine.hasOption("-z")) { System.err.println("WARN: ZooKeeper server is not set, embeded ZooKeeper server will be started."); System.err.println(/*from w ww . j a v a 2 s . c o m*/ "WARN: For big data guagua application with fail-over zookeeper servers, independent ZooKeeper instances are recommended."); System.err.println("WARN: Zookeeper servers can be provided by '-z' parameter with non-empty value."); boolean isZkInClient = conf.getBoolean(GuaguaConstants.GUAGUA_ZK_EMBEDBED_IS_IN_CLIENT, false); if (isZkInClient) { synchronized (GuaguaYarnClient.class) { if (embededZooKeeperServer == null) { // 1. start embed zookeeper server in one thread. int embedZkClientPort = 0; try { embedZkClientPort = ZooKeeperUtils.startEmbedZooKeeper(); } catch (IOException e) { throw new RuntimeException(e); } // 2. check if it is started. ZooKeeperUtils.checkIfEmbedZooKeeperStarted(embedZkClientPort); try { embededZooKeeperServer = InetAddress.getLocalHost().getHostName() + ":" + embedZkClientPort; } catch (UnknownHostException e) { throw new RuntimeException(e); } } } // 3. set local embed zookeeper server address conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, embededZooKeeperServer); } else { conf.set(GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS, conf.get( GuaguaConstants.GUAGUA_MASTER_SYSTEM_INTERCEPTERS, "ml.shifu.guagua.master.MasterTimer,ml.shifu.guagua.master.MemoryStatsMasterInterceptor,ml.shifu.guagua.hadoop.ZooKeeperMasterInterceptor,ml.shifu.guagua.master.NettyMasterCoordinator ")); conf.set(GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS, conf.get( GuaguaConstants.GUAGUA_WORKER_SYSTEM_INTERCEPTERS, "ml.shifu.guagua.worker.WorkerTimer,ml.shifu.guagua.worker.MemoryStatsWorkerInterceptor,ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor,ml.shifu.guagua.worker.NettyWorkerCoordinator")); System.err.println("WARN: Zookeeper server will be started in master node of cluster"); } return; } else { String zkServers = cmdLine.getOptionValue("z"); if (zkServers == null || zkServers.length() == 0) { throw new IllegalArgumentException( "Zookeeper servers should be provided by '-z' parameter with non-empty value."); } if (ZooKeeperUtils.checkServers(zkServers)) { conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, zkServers.trim()); } else { throw new RuntimeException("Your specifed zookeeper instance is not alive, please check."); } } }