List of usage examples for java.io CharArrayWriter toCharArray
public char[] toCharArray()
From source file:importer.filters.CceFilter.java
/** * Convert all the files in a directory * @param input the raw text input string * @param name the name of the new version * @param cortext a cortext mvd archive/*from w w w. jav a2 s.c om*/ * @param corcode a corcode mvd archive * @return the log */ public String convert(String input, String name, Archive cortex, Archive corcode) throws ImporterException { try { init(); CharArrayWriter txt = new CharArrayWriter(); String lastWord = ""; String firstWord = ""; String lineEnd = findLineEnding(input); String[] lines = input.split(lineEnd); paraSeen = true; for (int i = 0; i < lines.length; i++) { String str = lines[i].trim(); firstWord = getFirstWord(str); if (str.startsWith(".") && str.length() > 1 && Character.isLetter(str.charAt(1))) { convertDotCommand(str, txt); if (!lastEndsInHyphen && written > 0) writeCurrent(txt, SPACE); // don't reset lastWord continue; } else if (lines[i].startsWith(" ")) { Range r; if (!paraSeen) paraSeen = true; else if (written > paraStart) { // write previous para range r = new Range("p", paraStart, written - paraStart); markup.add(r); } if (written > 0) writeCurrent(txt, CR); paraStart = written; // markup new paragraphs with 4 spaces for readability r = new Range(CSSStyles.PARA_START, written, 4); markup.add(r); writeLineContents(" " + str, txt); } else { if (lastEndsInHyphen) { Range r; if (isHardHyphen(lastWord, firstWord)) { r = new Range(CSSStyles.STRONG, written - 1, 1); markup.add(r); } else { r = new Range(CSSStyles.WEAK, written - 1, 1); markup.add(r); } writeCurrent(txt, CR); r = new Range(CSSStyles.HYPHEN_CR, written - 1, 1); markup.add(r); } else if (written > 0) { writeCurrent(txt, CR); if (written == paraStart + 1) paraStart = written; } writeLineContents(str, txt); } if (!lineCommands.isEmpty()) { for (int j = lineCommands.size() - 1; j >= 0; j--) { Range r = new Range(lineCommands.get(j), paraStart, written - paraStart); markup.add(r); } lineCommands.clear(); paraStart = written; } lastWord = getLastWord(str); } // write closing para range if (written > paraStart) { Range r = new Range("p", paraStart, written - paraStart); markup.add(r); } markup.sort(); char[] chars = txt.toCharArray(); cortex.put(name, chars); String json = markup.toSTILDocument().toString(); corcode.put(name, json.toCharArray()); } catch (Exception e) { e.printStackTrace(System.out); } return ""; }
From source file:org.commoncrawl.util.ArcFileWriter.java
private String escapeURI(String uri, String charsetEncoding) throws IOException { boolean needToChange = false; StringBuffer out = new StringBuffer(uri.length()); Charset charset;/*w w w . j a v a 2 s . co m*/ CharArrayWriter charArrayWriter = new CharArrayWriter(); if (charsetEncoding == null) throw new NullPointerException("charsetName"); try { charset = Charset.forName(charsetEncoding); } catch (IllegalCharsetNameException e) { throw new UnsupportedEncodingException(charsetEncoding); } catch (UnsupportedCharsetException e) { throw new UnsupportedEncodingException(charsetEncoding); } for (int i = 0; i < uri.length();) { int c = (int) uri.charAt(i); // System.out.println("Examining character: " + c); if (dontNeedEncoding.get(c)) { out.append((char) c); i++; } else { // convert to external encoding before hex conversion do { charArrayWriter.write(c); /* * If this character represents the start of a Unicode surrogate pair, * then pass in two characters. It's not clear what should be done if * a bytes reserved in the surrogate pairs range occurs outside of a * legal surrogate pair. For now, just treat it as if it were any * other character. */ if (c >= 0xD800 && c <= 0xDBFF) { /* * System.out.println(Integer.toHexString(c) + * " is high surrogate"); */ if ((i + 1) < uri.length()) { int d = (int) uri.charAt(i + 1); /* * System.out.println("\tExamining " + Integer.toHexString(d)); */ if (d >= 0xDC00 && d <= 0xDFFF) { /* * System.out.println("\t" + Integer.toHexString(d) + * " is low surrogate"); */ charArrayWriter.write(d); i++; } } } i++; } while (i < uri.length() && !dontNeedEncoding.get((c = (int) uri.charAt(i)))); charArrayWriter.flush(); String str = new String(charArrayWriter.toCharArray()); byte[] ba = str.getBytes(charsetEncoding); for (int j = 0; j < ba.length; j++) { out.append('%'); char ch = Character.forDigit((ba[j] >> 4) & 0xF, 16); // converting to use uppercase letter as part of // the hex value if ch is a letter. if (Character.isLetter(ch)) { ch -= caseDiff; } out.append(ch); ch = Character.forDigit(ba[j] & 0xF, 16); if (Character.isLetter(ch)) { ch -= caseDiff; } out.append(ch); } charArrayWriter.reset(); needToChange = true; } } return (needToChange ? out.toString() : uri); }