List of usage examples for java.io Reader mark
public void mark(int readAheadLimit) throws IOException
From source file:net.metanotion.json.StreamingParser.java
private Lexeme maybeNumber(final Reader in, int firstChar) throws IOException { // this might be a number, if it is, lex it and return the token, otherwise throw an exception. final String integer = lexInt(in, firstChar); in.mark(MAX_BUFFER); final int c = in.read(); if (c == '.') { final String decimal = integer + lexFraction(in); return new Lexeme(Token.FLOAT, Double.valueOf(decimal)); } else if (Character.toLowerCase(c) == 'e') { in.reset();/*from w w w .j a v a2 s. com*/ final String decimal = integer + lexExp(in); return new Lexeme(Token.FLOAT, Double.valueOf(decimal)); } else { in.reset(); return new Lexeme(Token.INT, Long.valueOf(integer)); } }
From source file:com.streamsets.pipeline.lib.json.StreamingJsonParser.java
public StreamingJsonParser(Reader reader, long initialPosition, Mode mode) throws IOException { starting = true;//from w w w. ja va2 s .c o m this.reader = reader; if (mode == Mode.MULTIPLE_OBJECTS) { if (initialPosition > 0) { IOUtils.skipFully(reader, initialPosition); posCorrection += initialPosition; } if (reader.markSupported()) { reader.mark(MAX_CHARS_TO_READ_FORWARD); int count = 0; byte firstByte = -1; while (count++ < MAX_CHARS_TO_READ_FORWARD && (firstByte = (byte) reader.read()) != -1 && firstByte <= ' ') ; // everything less than a space is whitespace if (firstByte > ' ') { firstNonSpaceChar = firstByte; } reader.reset(); } } jsonParser = getObjectMapper().getFactory().createParser(reader); if (mode == Mode.ARRAY_OBJECTS && initialPosition > 0) { fastForwardJsonParser(initialPosition); } this.mode = mode; }
From source file:org.zilverline.extractors.AbstractExtractor.java
/** * This method extracts all relevant info of the file as an ParsedFileInfo object. Uses getContent as callback. * //from w w w. j a v a 2s .c om * @param f the File to extract content from * * @return ParsedFileInfo the object containing relevant info of the provided file */ public final ParsedFileInfo extractInfo(final File f) { if (f == null) { log.warn("Something went terribly wrong, file = null, returning null "); return null; } try { setFile(f); Reader reader = getContent(f); fileInfo.setReader(reader); // get the summary from the reader if (reader != null) { String summary = fileInfo.getSummary(); if (!StringUtils.hasText(summary)) { char[] sumChars = new char[SUMMARY_SIZE]; int numChars = 0; try { if (reader.markSupported()) { reader.mark(SUMMARY_SIZE); numChars = reader.read(sumChars); reader.reset(); } if (numChars > 0) { summary = new String(sumChars, 0, numChars); } if (log.isDebugEnabled()) { log.debug("Summary extracted from reader: " + summary); } setSummary(getSummaryFromContent(summary)); } catch (IOException e) { log.warn("Error extracting summary form reader", e); } } } // Set the title if there's none yet if (!StringUtils.hasLength(fileInfo.getTitle())) { fileInfo.setTitle(FileUtils.getBasename(f)); } } catch (Exception e) { // here we don't throw any, since we do not want to interrupt the indexing process log.warn("Unexpected Error extracting content from " + f.getName(), e); } catch (OutOfMemoryError e) { // this happens with very, very large Documents log.error("Very Serious Error. Out of Memory for very large documents: " + f.getName() + ", try increasing your JVM heap size: for example, start your server with option '-Xmx128m'." + " Skipping file.", e); } catch (Throwable e) { log.error("Very Serious Error while extracting contents from: " + f.getName(), e); } return fileInfo; }
From source file:com.streamsets.datacollector.json.JsonObjectReaderImpl.java
public JsonObjectReaderImpl(Reader reader, long initialPosition, Mode mode, Class<?> objectClass, ObjectMapper objectMapper) throws IOException { this.mode = mode; this.objectClass = objectClass; this.objectMapper = objectMapper; starting = true;/*from w w w.j a v a 2 s . c om*/ this.reader = reader; if (mode == Mode.MULTIPLE_OBJECTS) { if (initialPosition > 0) { IOUtils.skipFully(reader, initialPosition); posCorrection += initialPosition; } if (reader.markSupported()) { reader.mark(MAX_CHARS_TO_READ_FORWARD); int count = 0; byte firstByte = -1; while (count++ < MAX_CHARS_TO_READ_FORWARD && (firstByte = (byte) reader.read()) != -1 && firstByte <= ' ') ; // everything less than a space is whitespace if (firstByte > ' ') { firstNonSpaceChar = firstByte; } reader.reset(); } } jsonParser = getObjectMapper().getFactory().createParser(reader); if (mode == Mode.ARRAY_OBJECTS && initialPosition > 0) { fastForwardJsonParser(initialPosition); } }
From source file:net.metanotion.json.StreamingParser.java
private String lexInt(final Reader in, final int firstChar) throws IOException { final StringBuilder sb = new StringBuilder(); int digits = 0; if (firstChar == '-') { sb.append("-"); } else if (Character.isDigit(firstChar)) { sb.append(Character.toChars(firstChar)); digits++;/*from www . j av a 2 s . c om*/ } else { final String found = new String(Character.toChars(firstChar)); throw new ParserException("Expecting a number, instead found: '" + found + QUOTE); } while (true) { in.mark(MAX_BUFFER); final int c = in.read(); if (Character.isDigit(c)) { digits++; sb.append(Character.toChars(c)); } else { in.reset(); if (digits == 0) { throw new ParserException(EXPECTED_DIGIT); } return sb.toString(); } } }
From source file:de.micromata.genome.gwiki.page.gspt.ExtendedTemplate.java
/** * Parses the to elements./*from w w w . jav a2 s .co m*/ * * @param reader the reader * @return the list * @throws IOException Signals that an I/O exception has occurred. */ protected List<ParseElement> parseToElements(Reader reader) throws IOException { if (!reader.markSupported()) { reader = new BufferedReader(reader); } // StringWriter sw = new StringWriter(); List<ParseElement> elements = new ArrayList<ParseElement>(); startScript(elements); // boolean start = false; int c; while ((c = reader.read()) != -1) { if (c == '<') { reader.mark(1); c = reader.read(); if (c != '%') { // sw.write('<'); elements.add(new ParseElement(Type.ConstString, "<")); reader.reset(); } else { reader.mark(1); c = reader.read(); if (c == '=') { groovyExpression(reader, elements); } else if (c == '-') { reader.read(); groovyComment(reader, elements); } else if (c == '#') { groovySection(Type.GlobalCode, reader, elements); } else if (c == '!') { groovySection(Type.ClassCode, reader, elements); } else { reader.reset(); groovySection(Type.Statement, reader, elements); } } continue; // at least '<' is consumed ... read next chars. } else if (c == '-') { reader.mark(4); if (reader.read() == '-' && reader.read() == '%' && reader.read() == '>') { /** * @logging * @reason Innerhalb einer GSPT-Datei ist ein Kommentarendesequenz ohne oeffnendes * @action GSPT korrigieren */ GWikiLog.warn("In gspt --%> comment without open"); } reader.reset(); } if (elements.size() == 0 || elements.get(elements.size() - 1).type != Type.ConstString) elements.add(new ParseElement(Type.ConstString, "")); elements.get(elements.size() - 1).text.append((char) c); } return elements; }
From source file:BayesianAnalyzer.java
private String nextToken(Reader reader) throws java.io.IOException { StringBuffer token = new StringBuffer(); int i;//w ww. ja v a 2 s .c o m char ch, ch2; boolean previousWasDigit = false; boolean tokenCharFound = false; if (!reader.ready()) { return null; } while ((i = reader.read()) != -1) { ch = (char) i; if (ch == ':') { String tokenString = token.toString() + ':'; if (tokenString.equals("From:") || tokenString.equals("Return-Path:") || tokenString.equals("Subject:") || tokenString.equals("To:")) { return tokenString; } } if (Character.isLetter(ch) || ch == '-' || ch == '$' || ch == '\u20AC' // the EURO symbol || ch == '!' || ch == '\'') { tokenCharFound = true; previousWasDigit = false; token.append(ch); } else if (Character.isDigit(ch)) { tokenCharFound = true; previousWasDigit = true; token.append(ch); } else if (previousWasDigit && (ch == '.' || ch == ',')) { reader.mark(1); previousWasDigit = false; i = reader.read(); if (i == -1) { break; } ch2 = (char) i; if (Character.isDigit(ch2)) { tokenCharFound = true; previousWasDigit = true; token.append(ch); token.append(ch2); } else { reader.reset(); break; } } else if (ch == '\r') { // cr found, ignore } else if (ch == '\n') { // eol found tokenCharFound = true; previousWasDigit = false; token.append(ch); break; } else if (tokenCharFound) { break; } } if (tokenCharFound) { // System.out.println("Token read: " + token); return token.toString(); } else { return null; } }
From source file:com.globalsight.everest.edit.offline.upload.UploadApi.java
/** * Loads the upload file into an OfflinePageData object. * // w ww .j a v a2s. c o m * @param p_reader * a stream opened on the upload file. * @param p_keepIssues * when an OfflinePageData object is called *twice* to load data, * this parameter allows to keep issues read in the first run * (the second run normally clears the entire object). This is * necessary for RTF list view which first parses the RTF, then * loads the textual content as list view text file. * @return if there are no errors, null is returned. If there are errors, a * fully formed HTML error report page is returned. */ public String loadListViewTextFile(Reader p_reader, String p_fileName, boolean p_keepIssues) { if (m_uploadPageData == null) { m_uploadPageData = new OfflinePageData(); m_referencePageDatas = new ArrayList<PageData>(); } try { p_reader.mark(0); } catch (IOException e1) { e1.printStackTrace(); } String errPage = null; // Set the linefeed normalization sequence. if ((errPage = getLFNormalizationSequence()) != null) { return errPage; } // filter some text Reader new_reader = null; try { StringBuffer content = new StringBuffer(); BufferedReader br = new BufferedReader(p_reader); String line = br.readLine(); String previousLine = null; while (line != null) { boolean ignoreThisLine = line.startsWith(SEGMENT_PAGE_NAME_KEY) || line.startsWith(SEGMENT_FILE_PATH_KEY) || line.startsWith(HEADER_JOB_NAME) || line.startsWith(HEADER_JOB_ID) || line.startsWith(GS_TOOLKIT_FORMAT) || line.startsWith(SEGMENT_SID_KEY) || line.startsWith(SEGMENT_XLF_TARGET_STATE_KEY) || line.startsWith(SEGMENT_INCONTEXT_MATCH_KEY) || line.startsWith(SEGMENT_TM_PROFILE_KEY) || line.startsWith(SEGMENT_TERMBASE_KEY) || line.startsWith(HEADER_POPULATE_100_SEGMENTS); if (!ignoreThisLine) { content.append(line).append("\r\n"); } // check if it is omegat if (ignoreThisLine && line.startsWith(GS_TOOLKIT_FORMAT)) { int index = line.indexOf(":"); String f = index > 0 ? line.substring(index + 1).trim() : "xliff"; m_uploadPageData.setIsOmegaT("omegat".equalsIgnoreCase(f)); m_uploadPageData.setIsXliff("xliff".equalsIgnoreCase(f)); } // (GBS-3711) Store "state" attribute value of XLF target // section. if (ignoreThisLine && line.startsWith(SEGMENT_XLF_TARGET_STATE_KEY)) { int index = line.indexOf(":"); if (index > 0) { String state = line.substring(index + 1).trim(); String tuId = previousLine.substring(2); m_uploadPageData.addXlfTargetState(tuId, state); } } // GBS-3825 if (ignoreThisLine && line.startsWith(HEADER_POPULATE_100_SEGMENTS)) { int index = line.indexOf(":"); if (index > 0) { String isPopulate100 = line.substring(index + 1).trim(); m_uploadPageData.setPopulate100("yes".equalsIgnoreCase(isPopulate100)); } } previousLine = line; line = br.readLine(); } new_reader = new StringReader(content.toString()); br.close(); } catch (Exception e) { new_reader = p_reader; } // Read the upload file into an OfflinePageData object. try { m_errWriter.setFileName(p_fileName); m_uploadPageData.setLoadConversionLineBreak(m_normalizedLB); m_uploadPageData.loadOfflineTextFile(new_reader, false); Vector<OfflineSegmentData> list = m_uploadPageData.getSegmentList(); for (OfflineSegmentData object : list) { String targetText = object.getDisplayTargetText(); targetText = StringUtil.replace(targetText, OfflineConstants.PONUD_SIGN, "#"); object.setDisplayTargetText(targetText); } // set err writer's page, task and job ids m_errWriter.processOfflinePageData(m_uploadPageData); } catch (Throwable ex) { try { p_reader.reset(); } catch (IOException e) { e.printStackTrace(); } String errMsg = null; boolean noSegments = false; if (ex instanceof ParseException) { ParseException pe = (ParseException) ex; int[][] expected = pe.expectedTokenSequences; if (expected != null && expected.length == 2 && expected[0].length == 1 && expected[1].length == 1 && expected[0][0] == 8 && expected[1][0] == 9) { Token current = pe.currentToken; if (current != null && current.next != null && current.next.kind == 17) { noSegments = true; } } } // check if this is empty if (noSegments) { errMsg = m_messages.getString("NoSegmentsInFile"); } else { String exMsg = ex.getMessage(); String args[] = { EditUtil.encodeHtmlEntities(exMsg) }; bindErrMsg(args, p_reader); errMsg = MessageFormat.format(m_messages.getString("FormatTwoLoadError"), (Object[]) args); CATEGORY.error(errMsg); } m_errWriter.addFileErrorMsg(errMsg); m_errWriter.processOfflinePageData(m_uploadPageData); return m_errWriter.buildPage().toString(); } return null; }
From source file:net.sourceforge.pmd.util.IOUtil.java
public static Reader skipBOM(Reader source) { Reader in = new BufferedReader(source); try {// w ww . ja v a2s .c o m in.mark(1); int firstCharacter = in.read(); if (firstCharacter != '\ufeff') { in.reset(); } } catch (IOException e) { throw new RuntimeException("Error while trying to skip BOM marker", e); } return in; }
From source file:org.apache.tika.parser.csv.CSVSniffer.java
List<CSVResult> sniff(Reader reader) throws IOException { if (!reader.markSupported()) { reader = new BufferedReader(reader); }// w w w .java 2s . c o m List<CSVResult> ret = new ArrayList<>(); for (char delimiter : delimiters) { reader.mark(markLimit); try { CSVResult result = new Snifflet(delimiter).sniff(reader); ret.add(result); } finally { reader.reset(); } } Collections.sort(ret); return ret; }