List of usage examples for java.io UnsupportedEncodingException UnsupportedEncodingException
public UnsupportedEncodingException(String s)
From source file:org.searsia.engine.Resource.java
private String fillTemplate(String template, String query) throws UnsupportedEncodingException { String url = template;/*from www . ja v a 2 s .c o m*/ for (String param : this.privateParameters.keySet()) { url = url.replaceAll("\\{" + param + "\\??\\}", this.privateParameters.get(param)); } url = url.replaceAll("\\{q\\??\\}", query); url = url.replaceAll("\\{[0-9A-Za-z\\-_]+\\?\\}", ""); // remove optional parameters if (url.matches(".*\\{[0-9A-Za-z\\-_]+\\}.*")) { throw new UnsupportedEncodingException("Missing url parameter"); // TODO: better error } return url; }
From source file:org.eclipse.smila.connectivity.framework.crawler.web.WebCrawler.java
/** * Read attribute.//from www .ja v a2 s. co m * * @param indexDocument * the index document * @param attribute * the attribute * @param forceByteToString * the force byte to string * * @return the attribute value * * @throws UnsupportedEncodingException * the unsupported encoding exception */ private Serializable readAttribute(final IndexDocument indexDocument, final Attribute attribute, final boolean forceByteToString) throws UnsupportedEncodingException { if (attribute.getFieldAttribute() != null) { switch (attribute.getFieldAttribute()) { case URL: return indexDocument.getUrl(); case CONTENT: // search encoding in headers String charsetName = indexDocument.extractFromResponseHeaders(_contentTypePattern, 1); if (charsetName == null) { charsetName = UTF_8; } if (forceByteToString) { return new String(indexDocument.getContent(), charsetName); } else { if (UTF_8.equalsIgnoreCase(charsetName)) { return indexDocument.getContent(); } // decode to utf try { return (new String(indexDocument.getContent(), charsetName)).getBytes(UTF_8); } catch (UnsupportedEncodingException uee) { throw new UnsupportedEncodingException("Test"); } } case TITLE: return indexDocument.getTitle(); case MIME_TYPE: return indexDocument.extractFromResponseHeaders(_mimeTypePattern, 1); default: throw new IllegalArgumentException("Unknown field attribute type " + attribute.getFieldAttribute()); } } else if (attribute.getMetaAttribute() != null) { final MetaType metaType = attribute.getMetaAttribute().getType(); final List<String> metaNames = attribute.getMetaAttribute().getMetaName(); List<String> metaData; switch (metaType) { case META_DATA: metaData = getFilteredMetadataList(indexDocument.getHtmlMetaData(), metaNames); break; case RESPONSE_HEADER: metaData = getFilteredMetadataList(indexDocument.getResponseHeaders(), metaNames); break; case META_DATA_WITH_RESPONSE_HEADER_FALL_BACK: metaData = getFilteredMetadataList(indexDocument.getMetaDataWithResponseHeaderFallBack(), metaNames); break; default: throw new IllegalArgumentException("Unknown meta attribute type " + attribute.getFieldAttribute()); } final MetaReturnType returnType = attribute.getMetaAttribute().getReturnType(); switch (returnType) { case META_DATA_STRING: return metaData.toArray(); case META_DATA_VALUE: for (int i = 0; i < metaData.size(); i++) { final String metaDataString = metaData.get(i); metaData.set(i, metaDataString.substring(metaDataString.indexOf(METADATA_SEPARATOR) + 1).trim()); } return metaData.toArray(); case META_DATA_M_OBJECT: final NameValuePair[] metaDataNameValuePairs = new NameValuePair[metaData.size()]; for (int i = 0; i < metaData.size(); i++) { final String metaDataString = metaData.get(i); final String metadataName = metaDataString .substring(0, metaDataString.indexOf(METADATA_SEPARATOR)).trim(); final String metaDataValue = metaDataString .substring(metaDataString.indexOf(METADATA_SEPARATOR) + 1).trim(); metaDataNameValuePairs[i] = new NameValuePair(metadataName, metaDataValue); } return metaDataNameValuePairs; default: throw new IllegalArgumentException("Unknown meta attribute return type " + returnType); } } else { throw new IllegalArgumentException("Unknown attribute " + attribute.getName()); } }
From source file:uk.ac.soton.itinnovation.easyjena.core.impl.JenaOntologyManager.java
/** * Loads ontology from a serialised string * * @param ontRDFString serialized ontology string * @param baseURI model base URI//from ww w . ja v a 2 s. c o m * @param format format of the ontology stream * @param encoding the string's encoding. uses utf-8 by default (if null) * @param location the loading type for import statements * @return the loaded model * @throws java.io.UnsupportedEncodingException if the encoding is not supported * @throws java.io.IOException if the stream could not be read for any reason * * @see JenaOntologyManager.ModelFormat * @see JenaOntologyManager.LoadingLocation */ public Model loadOntologyFromString(String ontRDFString, String baseURI, ModelFormat format, String encoding, LoadingLocation location) throws IOException { logger.info("Loading ontology from serialised string"); Model m = null; InputStream stream = null; //use UTF-8 as standard encoding String enc = encoding; if (enc == null) { enc = "UTF-8"; } try { //start with an empty model m = ModelFactory.createDefaultModel(); stream = new ByteArrayInputStream(ontRDFString.getBytes(enc)); m.read(stream, baseURI, format.getText()); m = loadOntologyModel(m, baseURI, location); } catch (UnsupportedEncodingException e) { logger.error("Encoding error while reading ontology <{}> from serialised string", baseURI, e); throw new UnsupportedEncodingException("Could not load ontology <" + baseURI + "> from serialised string. " + "The encoding was incorrect."); } catch (Exception e) { logger.error("Could not load ontology <{}> from stream", baseURI, e); throw new IOException("Could not load ontology <" + baseURI + "> from serialised string. "); } finally { //close stream in case it has been opened if (stream != null) { try { stream.close(); } catch (IOException ex) { logger.error("Error closing RDF stream", ex); //not throwing here as it would not affect the functionality } } } return loadOntologyModel(m, baseURI, location); }
From source file:org.commoncrawl.util.ArcFileWriter.java
private String escapeURI(String uri, String charsetEncoding) throws IOException { boolean needToChange = false; StringBuffer out = new StringBuffer(uri.length()); Charset charset;//www. ja v a 2 s.c o m CharArrayWriter charArrayWriter = new CharArrayWriter(); if (charsetEncoding == null) throw new NullPointerException("charsetName"); try { charset = Charset.forName(charsetEncoding); } catch (IllegalCharsetNameException e) { throw new UnsupportedEncodingException(charsetEncoding); } catch (UnsupportedCharsetException e) { throw new UnsupportedEncodingException(charsetEncoding); } for (int i = 0; i < uri.length();) { int c = (int) uri.charAt(i); // System.out.println("Examining character: " + c); if (dontNeedEncoding.get(c)) { out.append((char) c); i++; } else { // convert to external encoding before hex conversion do { charArrayWriter.write(c); /* * If this character represents the start of a Unicode surrogate pair, * then pass in two characters. It's not clear what should be done if * a bytes reserved in the surrogate pairs range occurs outside of a * legal surrogate pair. For now, just treat it as if it were any * other character. */ if (c >= 0xD800 && c <= 0xDBFF) { /* * System.out.println(Integer.toHexString(c) + * " is high surrogate"); */ if ((i + 1) < uri.length()) { int d = (int) uri.charAt(i + 1); /* * System.out.println("\tExamining " + Integer.toHexString(d)); */ if (d >= 0xDC00 && d <= 0xDFFF) { /* * System.out.println("\t" + Integer.toHexString(d) + * " is low surrogate"); */ charArrayWriter.write(d); i++; } } } i++; } while (i < uri.length() && !dontNeedEncoding.get((c = (int) uri.charAt(i)))); charArrayWriter.flush(); String str = new String(charArrayWriter.toCharArray()); byte[] ba = str.getBytes(charsetEncoding); for (int j = 0; j < ba.length; j++) { out.append('%'); char ch = Character.forDigit((ba[j] >> 4) & 0xF, 16); // converting to use uppercase letter as part of // the hex value if ch is a letter. if (Character.isLetter(ch)) { ch -= caseDiff; } out.append(ch); ch = Character.forDigit(ba[j] & 0xF, 16); if (Character.isLetter(ch)) { ch -= caseDiff; } out.append(ch); } charArrayWriter.reset(); needToChange = true; } } return (needToChange ? out.toString() : uri); }
From source file:dk.statsbiblioteket.util.LineReader.java
@Override public void writeUTF(String str) throws IOException { throw new UnsupportedEncodingException( "This is not supported as the " + "necessary util is package " + "private in DataOutputStream"); }
From source file:org.python.pydev.core.REF.java
/** * @param fileLocation may be null/*from www .j a va 2 s . c o m*/ */ /*package*/static String getValidEncoding(String ret, String fileLocation) { if (ret == null) { return ret; } final String lower = ret.trim().toLowerCase(); if (lower.startsWith("latin")) { if (lower.indexOf("1") != -1) { return "latin1"; //latin1 } } if (lower.equals("iso-latin-1-unix")) { return "latin1"; //handle case from python libraries } try { if (!Charset.isSupported(ret)) { if (LOG_ENCODING_ERROR) { if (fileLocation != null) { if ("uft-8".equals(ret) && fileLocation.endsWith("bad_coding.py")) { return null; //this is an expected error in the python library. } } String msg = "The encoding found: >>" + ret + "<< on " + fileLocation + " is not a valid encoding."; Log.log(IStatus.ERROR, msg, new UnsupportedEncodingException(msg)); } return null; //ok, we've been unable to make it supported (better return null than an unsupported encoding). } return ret; } catch (IllegalCharsetNameException ex) { if (LOG_ENCODING_ERROR) { String msg = "The encoding found: >>" + ret + "<< on " + fileLocation + " is not a valid encoding."; Log.log(IStatus.ERROR, msg, ex); } } return null; }