List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:TestString.java
License:Apache License
@Test public void testTextSubstring() throws Exception { Text text = new Text("string"); Text text1 = new Text(); Text text2 = new Text(); long start = System.nanoTime(); for (int i = 0; i < 100000000; i++) { text1.set(text.getBytes(), 0, 2); text2.set(text.getBytes(), 3, text.getLength() - 3); }//from w w w . j a v a2 s . c om long end = System.nanoTime(); System.out.println("TestTextSubString"); System.out.println("text1: " + text1.toString()); System.out.println("text2: " + text2.toString()); System.out.println("Elapsed Time: " + (end - start) / 1000000000f + " seconds."); }
From source file:PageRankIterationReducerTest.java
public void test1() { Text key = new Text("testKey"); PageRankFollower pageRankFollower = new PageRankFollower(new Text(key.toString()), 1.0, 0, new ArrayList<Text>()); List<PageRankFollower> values = new ArrayList<PageRankFollower>(); values.add(pageRankFollower);//from w w w. ja v a 2 s .com PageRankFollower result = (new PageRankIterationReducer()).calculatePageRank(key, values); System.out.println("result: " + result); }
From source file:AnagramReducer.java
License:Apache License
public void reduce(Text anagramKey, Iterator<Text> anagramValues, OutputCollector<Text, Text> results, Reporter reporter) throws IOException { String output = ""; while (anagramValues.hasNext()) { Text anagam = anagramValues.next(); output = output + anagam.toString() + "~"; }/*from ww w. j a v a 2s .com*/ StringTokenizer outputTokenizer = new StringTokenizer(output, "~"); /* if(outputTokenizer.countTokens()>=2) {*/ output = output.replace("~", ","); outputKey.set(anagramKey.toString() + "(" + outputTokenizer.countTokens() + "): "); outputValue.set(output); results.collect(outputKey, outputValue); //} }
From source file:SampleUdf.java
License:Apache License
public Text evaluate(final Text s, Text sleepTime) throws InterruptedException { Long time = 180 * 1000L;//from w w w . j av a 2 s. c o m if (sleepTime != null) { time = Long.parseLong(sleepTime.toString()) * 1000L; } System.out.println("Sleep Time : " + time); Thread.sleep(time); if (s == null) { return null; } return new Text(s.toString().toLowerCase()); }
From source file:$.ExampleBulkImporter.java
License:Apache License
/** {@inheritDoc} */ @Override// w ww .j a v a2 s . c om public void produce(LongWritable filePos, Text value, KijiTableContext context) throws IOException { final String[] split = value.toString().split(":"); final String rowKey = split[0]; final int integerValue = Integer.parseInt(split[1]); final EntityId eid = context.getEntityId(rowKey); context.put(eid, "primitives", "int", integerValue); }
From source file:$package.SparkPageRankProgram.java
License:Apache License
@Override public void run(SparkContext sc) { LOG.info("Processing backlinkURLs data"); JavaPairRDD<LongWritable, Text> backlinkURLs = sc.readFromStream("backlinkURLStream", Text.class); int iterationCount = getIterationCount(sc); LOG.info("Grouping data by key"); // Grouping backlinks by unique URL in key JavaPairRDD<String, Iterable<String>> links = backlinkURLs.values() .mapToPair(new PairFunction<Text, String, String>() { @Override// w ww .java 2s.co m public Tuple2<String, String> call(Text s) { String[] parts = SPACES.split(s.toString()); return new Tuple2<>(parts[0], parts[1]); } }).distinct().groupByKey().cache(); // Initialize default rank for each key URL JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() { @Override public Double call(Iterable<String> rs) { return 1.0; } }); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < iterationCount; current++) { LOG.debug("Processing data with PageRank algorithm. Iteration {}/{}", current + 1, (iterationCount)); // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values() .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() { @Override public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) { LOG.debug("Processing {} with rank {}", s._1(), s._2()); int urlCount = Iterables.size(s._1()); List<Tuple2<String, Double>> results = new ArrayList<>(); for (String n : s._1()) { results.add(new Tuple2<>(n, s._2() / urlCount)); } return results; } }); // Re-calculates URL ranks based on backlink contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { @Override public Double call(Double sum) { return 0.15 + sum * 0.85; } }); } LOG.info("Writing ranks data"); final ServiceDiscoverer discoveryServiceContext = sc.getServiceDiscoverer(); final Metrics sparkMetrics = sc.getMetrics(); JavaPairRDD<byte[], Integer> ranksRaw = ranks .mapToPair(new PairFunction<Tuple2<String, Double>, byte[], Integer>() { @Override public Tuple2<byte[], Integer> call(Tuple2<String, Double> tuple) throws Exception { LOG.debug("URL {} has rank {}", Arrays.toString(tuple._1().getBytes(Charsets.UTF_8)), tuple._2()); URL serviceURL = discoveryServiceContext .getServiceURL(SparkPageRankApp.GOOGLE_TYPE_PR_SERVICE_NAME); if (serviceURL == null) { throw new RuntimeException( "Failed to discover service: " + SparkPageRankApp.GOOGLE_TYPE_PR_SERVICE_NAME); } try { URLConnection connection = new URL(serviceURL, String.format("transform/%s", tuple._2().toString())).openConnection(); try (BufferedReader reader = new BufferedReader( new InputStreamReader(connection.getInputStream(), Charsets.UTF_8))) { String pr = reader.readLine(); if ((Integer.parseInt(pr)) == POPULAR_PAGE_THRESHOLD) { sparkMetrics.count(POPULAR_PAGES, 1); } else if (Integer.parseInt(pr) <= UNPOPULAR_PAGE_THRESHOLD) { sparkMetrics.count(UNPOPULAR_PAGES, 1); } else { sparkMetrics.count(REGULAR_PAGES, 1); } return new Tuple2(tuple._1().getBytes(Charsets.UTF_8), Integer.parseInt(pr)); } } catch (Exception e) { LOG.warn("Failed to read the Stream for service {}", SparkPageRankApp.GOOGLE_TYPE_PR_SERVICE_NAME, e); throw Throwables.propagate(e); } } }); // Store calculated results in output Dataset. // All calculated results are stored in one row. // Each result, the calculated URL rank based on backlink contributions, is an entry of the row. // The value of the entry is the URL rank. sc.writeToDataset(ranksRaw, "ranks", byte[].class, Integer.class); LOG.info("PageRanks successfuly computed and written to \"ranks\" dataset"); }
From source file:adept.mapreduce.AdeptMapper.java
License:Apache License
public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { try {/*www.j a va2 s .c om*/ HltContentContainer hltcontentcontainer = (HltContentContainer) xmlserializer .deserializeString(value.toString(), HltContentContainer.class); hltcontentcontainer = doProcess(hltcontentcontainer); String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer); serializedHltContainer = serializedHltContainer.replaceAll("\\r\\n", " "); serializedHltContainer = serializedHltContainer.replaceAll("\\n", " "); output.collect(key, new Text(serializedHltContainer)); } catch (Exception e) { //System.out.println(e.getMessage()); System.out.println("Exception thrown in map function: " + e.getLocalizedMessage()); } }
From source file:adept.mapreduce.PreprocessingJob.java
License:Apache License
public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { HltContentContainer hltcontentcontainer = new HltContentContainer(); Document doc = DocumentMaker.getInstance().createDefaultDocument(key.toString(), null, null, null, null, value.toString(), hltcontentcontainer); // sentence segmentation.For now, consider all text as a single sentence. List<Sentence> sentences = new ArrayList<Sentence>(); sentences.addAll(/* ww w . ja va2 s . com*/ OpenNLPSentenceSegmenter.getInstance().getSentences(doc.getValue(), doc.getDefaultTokenStream())); hltcontentcontainer.setSentences(sentences); XMLSerializer xmlserializer = new XMLSerializer(SerializationType.XML); String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer); serializedHltContainer = serializedHltContainer.replaceAll("\r\n", " "); serializedHltContainer = serializedHltContainer.replaceAll("\n", " "); output.collect(key, new Text(serializedHltContainer)); }
From source file:AllLab_Skeleton.Lab1.WordCount_Mapper.java
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); //System.out.println("Content on line " + key.toString() + " :- " + value.toString() ); while (itr.hasMoreTokens()) { word.set(itr.nextToken());/* ww w . j a v a2 s . com*/ context.write(word, one); } }
From source file:AllLab_Skeleton.Lab2.Lab2Mapper.java
@Override public void map(Object key, Text values, Context context) { if (values.toString().length() > 0) { try {/*w ww .ja v a2s . co m*/ String value[] = values.toString().split("\t"); CompositeKeyWritable cw = new CompositeKeyWritable(value[6], value[3]); context.write(cw, NullWritable.get()); } catch (IOException | InterruptedException ex) { Logger.getLogger(Lab2Mapper.class.getName()).log(Level.SEVERE, null, ex); } } }