List of usage examples for org.apache.hadoop.io Text encode
public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException
From source file:com.ibm.bi.dml.runtime.transform.DistinctValue.java
License:Open Source License
public DistinctValue(String w, long count) throws CharacterCodingException { ByteBuffer bb = Text.encode(w, true); _bytes = bb.array();/* w w w . ja va2s. c o m*/ _length = bb.limit(); _count = count; }
From source file:cosmos.mapred.AggregatingRecordReader.java
License:Apache License
private void textAppend(Text t, String s) throws IOException { try {/* ww w. ja v a2 s . c om*/ ByteBuffer buf = Text.encode(s, false); t.append(buf.array(), 0, buf.limit()); } catch (CharacterCodingException e) { throw new IOException(e); } }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.Gram.java
License:Apache License
/** * /*from ww w .ja va2 s . c om*/ * Create a gram with the specified frequency. * * @param ngram * the gram string * @param frequency * the gram frequency * @param type * whether the gram is at the head of its text unit or tail or unigram */ public Gram(String ngram, int frequency, Type type) { Preconditions.checkNotNull(ngram); try { // extra character is used for storing type which is part // of the sort key. ByteBuffer bb = Text.encode('\0' + ngram, true); bytes = bb.array(); length = bb.limit(); } catch (CharacterCodingException e) { throw new IllegalStateException("Should not have happened ", e); } encodeType(type, bytes, 0); this.frequency = frequency; }
From source file:mvm.rya.indexing.accumulo.freetext.ColumnPrefixes.java
License:Apache License
private static Text concat(Text prefix, String str) { Text temp = new Text(prefix); try {/*from ww w.j a v a2 s . c o m*/ ByteBuffer buffer = Text.encode(str, false); temp.append(buffer.array(), 0, buffer.limit()); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } return temp; }
From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java
License:Apache License
/** * Appends the UTF-8 bytes of the given string to the given {@link Text} *///from w ww . j ava 2 s .com public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) { try { ByteBuffer buffer = Text.encode(s, replaceBadChar); t.append(buffer.array(), 0, buffer.limit()); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } }
From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java
License:Apache License
/** * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to * UTF-8 and is much faster than calling {@link String#getBytes(String)}. * * @param string//w ww .j a v a 2s. c o m * the string to convert * @return the UTF-8 representation of the string */ public static byte[] toUtf8(String string) { ByteBuffer buffer; try { buffer = Text.encode(string, false); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } byte[] bytes = new byte[buffer.limit()]; System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length); return bytes; }
From source file:org.apache.mahout.utils.nlp.collocations.llr.Gram.java
License:Apache License
/** * /*from ww w . j a va2 s .c o m*/ * Create a gram with the specified frequency. * * @param ngram * the gram string * @param frequency * the gram frequency * @param type * whether the gram is at the head of its text unit or tail or unigram */ public Gram(String ngram, int frequency, Type type) { if (ngram == null) { throw new NullPointerException(); } try { // extra character is used for storing type which is part // of the sort key. ByteBuffer bb = Text.encode('\0' + ngram, true); bytes = bb.array(); length = bb.limit(); } catch (CharacterCodingException e) { throw new IllegalStateException("Should not have happened ", e); } encodeType(type, bytes, 0); this.frequency = frequency; }