org.colombbus.tangara.io.ScriptReader.java Source code

Introduction

Here is the source code for org.colombbus.tangara.io.ScriptReader.java
Source

 /**
  * Tangara is an educational platform to get started with programming.
  * Copyright (C) 2009 Colombbus (http://www.colombbus.org)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 package org.colombbus.tangara.io;

 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
 import java.util.List;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang.Validate;

 /**
  * Reader of scripts
  */
 public class ScriptReader {

     /** List of supported charset list */
     private static final List<Charset> COMMON_CHARSETS = new ArrayList<Charset>();

     private static final void addCommonCharsetIfSupported(String charsetName) {
         if (Charset.isSupported(charsetName)) {
             Charset cs = Charset.forName(charsetName);
             if (COMMON_CHARSETS.contains(cs) == false) {
                 COMMON_CHARSETS.add(cs);
             }
         }
     }

     static {
         addCommonCharsetIfSupported("UTF-8"); //$NON-NLS-1$
         addCommonCharsetIfSupported("ISO-8859-1"); //$NON-NLS-1$
         addCommonCharsetIfSupported("x-MacRoman"); //$NON-NLS-1$
     }

private static final char[] WITNESS_CHARS = { '', '', '', '', '', '', '', '', '', '', '', '', '', '' };

     private Charset lastScriptCharset = Charset.forName("UTF-8"); //$NON-NLS-1$

     /**
      * Create a script reader
      *
      */
     public ScriptReader() {
     }

     public String readScript(InputStream in) throws IOException, MalformedScriptException {
         Validate.notNull(in, "in argument is null"); //$NON-NLS-1$

         ByteBuffer content = toByteBuffer(in);

         if (ScriptHeaderHelper.containsHeader(content)) {
             return extractScriptWithHeader(content);
         } else {
             return extractScriptWithoutHeader(content);
         }
     }

     public String readScript(File scriptFile) throws IOException, MalformedScriptException {
         InputStream in = null;
         try {
             in = new FileInputStream(scriptFile);
             String script = readScript(in);
             return script;
         } finally {
             IOUtils.closeQuietly(in);
         }
     }

     private static ByteBuffer toByteBuffer(InputStream in) throws IOException {
         byte[] contentByteArray = IOUtils.toByteArray(in);
         ByteBuffer content = ByteBuffer.wrap(contentByteArray);
         return content;
     }

     private String extractScriptWithoutHeader(ByteBuffer content) throws UnsupportedEncodingException {
         for (Charset cs : COMMON_CHARSETS) {
             String decodedScript = tryDecodeBufferWithCharset(content, cs);
             if (containsWitnessChar(decodedScript)) {
                 lastScriptCharset = cs;
                 return decodedScript;
             }
         }

         for (Charset cs : Charset.availableCharsets().values()) {
             String decodedScript = tryDecodeBufferWithCharset(content, cs);
             if (decodedScript != null) {
                 lastScriptCharset = cs;
                 return decodedScript;
             }
         }

         throw new UnsupportedEncodingException();
     }

     private static boolean containsWitnessChar(String script) {
         if (script != null)
             return StringUtils.containsAny(script, WITNESS_CHARS);
         return false;
     }

     /**
      * Try to decode a byte buffer with a charset
      *
      * @param content
      *            the bute buffer
      * @param cs
      *            the charset
      * @return <code>null</code> if the charset is not supported, or the decoded
      *         string
      */
     private static String tryDecodeBufferWithCharset(ByteBuffer content, Charset cs) {
         CharBuffer buffer = CharBuffer.allocate(content.capacity() * 2);
         CharsetDecoder decoder = createDecoder(cs);
         content.rewind();
         CoderResult coderRes = decoder.decode(content, buffer, true);
         if (coderRes.isError() == false) {
             buffer.rewind();
             return buffer.toString().trim();
         }
         return null;
     }

     private static CharsetDecoder createDecoder(Charset cs) {
         CharsetDecoder decoder = cs.newDecoder();
         decoder.onMalformedInput(CodingErrorAction.REPORT);
         decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
         return decoder;
     }

     private String extractScriptWithHeader(ByteBuffer content) throws UnsupportedEncodingException {
         content.rewind();

         ScriptHeader header = ScriptHeaderHelper.extractHeader(content);
         lastScriptCharset = header.getCharset();

         String script = extractScriptWithCharset(content, header.getCharset());
         return script;
     }

     private static String extractScriptWithCharset(ByteBuffer content, Charset charset) {
         CharBuffer contentBuffer = charset.decode(content);
         return contentBuffer.toString().trim();
     }

     /**
      * Get the charset of the last decoded script. If no script has been read,
      * it returns UTF-8 charset
      *
      * @return a charset, never <code>null</code>
      */
     public Charset lastScriptCharset() {
         return lastScriptCharset;
     }

 }