Here you can find the source of readNextWord(BufferedInputStream in, Charset cs)
private static String readNextWord(BufferedInputStream in, Charset cs)
//package com.java2s; //License from project: Apache License import java.io.BufferedInputStream; import java.io.IOException; import java.nio.charset.Charset; import java.util.Arrays; public class Main { private static final int MAX_TERM_LENGTH = 500; private static String readNextWord(BufferedInputStream in, Charset cs) { byte[] buf = new byte[MAX_TERM_LENGTH]; try {/*ww w . j av a 2 s .c o m*/ int p = 0; char ch = (char) in.read(); // GoogleNews-vectors-negative300.bin dosen't include '\n' chars // between vectors - this check allows you to load binary files // created with either version of Mikolov's word2vec code while (Character.isWhitespace(ch)) { ch = (char) in.read(); } while (!Character.isWhitespace(ch)) { buf[p] = (byte) ch; ch = (char) in.read(); p++; } buf = Arrays.copyOf(buf, p); } catch (IOException e) { throw new RuntimeException("Failed to read next word"); } return new String(buf, cs); } }