Example usage for java.util Scanner hasNext

List of usage examples for java.util Scanner hasNext

Introduction

In this page you can find the example usage for java.util Scanner hasNext.

Prototype

public boolean hasNext() 

Source Link

Document

Returns true if this scanner has another token in its input.

Usage

From source file:it.acubelab.smaph.SmaphAnnotator.java

/**
 * Issue the query to bing, return the json object.
 * /*from  w  w w.  j a va 2 s.  c o m*/
 * @param query
 *            the query.
 * @param retryLeft
 *            how many retry left we have (if zero, will return an empty
 *            object in case of failure).
 * @return the JSON object as returned by the Bing Api.
 * @throws Exception
 *             is the call to the API failed.
 */
private synchronized JSONObject queryBing(String query, int retryLeft) throws Exception {
    boolean forceCacheOverride = retryLeft < BING_RETRY;
    if (forceCacheOverride)
        Thread.sleep(1000);
    String accountKeyAuth = Base64.encode((bingKey + ":" + bingKey).getBytes(), 0);

    URL url = new URL(
            "https://api.datamarket.azure.com/Bing/Search/v1/Composite?Sources=%27web%2Bspell%2BRelatedSearch%27&Query=%27"
                    + URLEncoder.encode(query, "utf8")
                    + "%27&Options=%27EnableHighlighting%27&Market=%27en-US%27&Adult=%27Off%27&$format=Json");

    JSONObject result = null;
    byte[] compressed = url2jsonCache.get(url.toExternalForm());
    if (compressed != null)
        result = new JSONObject(SmaphUtils.decompress(compressed));

    boolean cached = !forceCacheOverride && result != null;
    SmaphAnnotatorDebugger.out.printf("%s%s %s%n", forceCacheOverride ? "<forceCacheOverride>" : "",
            cached ? "<cached>" : "Querying", url);
    if (!cached) {
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setConnectTimeout(0);
        connection.setRequestProperty("Authorization", "Basic " + accountKeyAuth);
        connection.setRequestProperty("Accept", "*/*");
        connection.setRequestProperty("Content-Type", "multipart/form-data");

        connection.setUseCaches(false);

        if (connection.getResponseCode() != 200) {
            Scanner s = new Scanner(connection.getErrorStream()).useDelimiter("\\A");
            System.err.printf("Got HTTP error %d. Message is: %s%n", connection.getResponseCode(), s.next());
            s.close();
            throw new RuntimeException("Got response code:" + connection.getResponseCode());
        }

        Scanner s = new Scanner(connection.getInputStream()).useDelimiter("\\A");
        String resultStr = s.hasNext() ? s.next() : "";
        result = new JSONObject(resultStr);
        url2jsonCache.put(url.toExternalForm(), SmaphUtils.compress(result.toString()));
        increaseFlushCounter();
    }

    if (recacheNeeded(result) && retryLeft > 0)
        return queryBing(query, retryLeft - 1);

    return result;
}

From source file:edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator.java

public void subsetFile(InputStream in, String outfile, List<Integer> columns, Long numCases, String delimiter) {
    try {/* w  w w.ja  va2s.  c  om*/
        Scanner scanner = new Scanner(in);
        scanner.useDelimiter("\\n");

        BufferedWriter out = new BufferedWriter(new FileWriter(outfile));
        for (long caseIndex = 0; caseIndex < numCases; caseIndex++) {
            if (scanner.hasNext()) {
                String[] line = (scanner.next()).split(delimiter, -1);
                List<String> ln = new ArrayList<String>();
                for (Integer i : columns) {
                    ln.add(line[i]);
                }
                out.write(StringUtils.join(ln, "\t") + "\n");
            } else {
                throw new RuntimeException("Tab file has fewer rows than the determined number of cases.");
            }
        }

        while (scanner.hasNext()) {
            if (!"".equals(scanner.next())) {
                throw new RuntimeException(
                        "Tab file has extra nonempty rows than the determined number of cases.");

            }
        }

        scanner.close();
        out.close();

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator.java

private File generateRotatedImage(File tabfile, int varcount, int casecount) throws IOException {
    // TODO: throw exceptions if bad file, zero varcount, etc. ...

    String fileName = tabfile.getAbsolutePath();
    String rotatedImageFileName = fileName + ".90d";

    int MAX_OUTPUT_STREAMS = 32;
    int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now?
    int MAX_COLUMN_BUFFER = 8 * 1024;

    // offsetHeader will contain the byte offsets of the individual column 
    // vectors in the final rotated image file
    byte[] offsetHeader = new byte[varcount * 8];
    int[] bufferedSizes = new int[varcount];
    long[] cachedfileSizes = new long[varcount];
    File[] columnTempFiles = new File[varcount];

    for (int i = 0; i < varcount; i++) {
        bufferedSizes[i] = 0;/*from ww  w .j  av  a2 s  .c o  m*/
        cachedfileSizes[i] = 0;
    }

    // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is 
    // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?)

    byte[][] bufferedColumns = new byte[varcount][MAX_COLUMN_BUFFER];

    // read the tab-delimited file: 

    FileInputStream tabfileStream = new FileInputStream(tabfile);

    Scanner scanner = new Scanner(tabfileStream);
    scanner.useDelimiter("\\n");

    for (int caseindex = 0; caseindex < casecount; caseindex++) {
        if (scanner.hasNext()) {
            String[] line = (scanner.next()).split("\t", -1);
            // TODO: throw an exception if there are fewer tab-delimited 
            // tokens than the number of variables specified. 
            String token = "";
            int tokensize = 0;
            for (int varindex = 0; varindex < varcount; varindex++) {
                // TODO: figure out the safest way to convert strings to 
                // bytes here. Is it going to be safer to use getBytes("UTF8")?
                // we are already making the assumption that the values 
                // in the tab file are in UTF8. -- L.A.
                token = line[varindex] + "\n";
                tokensize = token.getBytes().length;
                if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) {
                    // fill the buffer and dump its contents into the temp file:
                    // (do note that there may be *several* MAX_COLUMN_BUFFERs
                    // worth of bytes in the token!)

                    int tokenoffset = 0;

                    if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) {
                        tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex];
                        System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex],
                                bufferedSizes[varindex], tokenoffset);
                    } // (otherwise the buffer is already full, and we should 
                      // simply dump it into the temp file, without adding any 
                      // extra bytes to it)

                    File bufferTempFile = columnTempFiles[varindex];
                    if (bufferTempFile == null) {
                        bufferTempFile = File.createTempFile("columnBufferFile", "bytes");
                        columnTempFiles[varindex] = bufferTempFile;
                    }

                    // *append* the contents of the buffer to the end of the
                    // temp file, if already exists:
                    BufferedOutputStream outputStream = new BufferedOutputStream(
                            new FileOutputStream(bufferTempFile, true));
                    outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER);
                    cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;

                    // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into 
                    // the temp file, for as long as there's more than MAX_COLUMN_BUFFER
                    // bytes left in the token:

                    while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) {
                        outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER);
                        cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
                        tokenoffset += MAX_COLUMN_BUFFER;
                    }

                    outputStream.close();

                    // buffer the remaining bytes and reset the buffered 
                    // byte counter: 

                    System.arraycopy(token.getBytes(), tokenoffset, bufferedColumns[varindex], 0,
                            tokensize - tokenoffset);

                    bufferedSizes[varindex] = tokensize - tokenoffset;

                } else {
                    // continue buffering
                    System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex],
                            tokensize);
                    bufferedSizes[varindex] += tokensize;
                }
            }
        } else {
            scanner.close();
            throw new IOException("Tab file has fewer rows than the stored number of cases!");
        }

    }

    // OK, we've created the individual byte vectors of the tab file columns;
    // they may be partially saved in temp files and/or in memory.
    // We now need to go through all these buffers and create the final 
    // rotated image file. 

    BufferedOutputStream finalOut = new BufferedOutputStream(
            new FileOutputStream(new File(rotatedImageFileName)));

    // but first we should create the offset header and write it out into 
    // the final file; because it should be at the head, doh!

    long columnOffset = varcount * 8;
    // (this is the offset of the first column vector; it is equal to the
    // size of the offset header, i.e. varcount * 8 bytes)

    for (int varindex = 0; varindex < varcount; varindex++) {
        long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex];
        columnOffset += totalColumnBytes;
        //totalColumnBytes;
        byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array();
        System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8);
    }

    finalOut.write(offsetHeader, 0, varcount * 8);

    for (int varindex = 0; varindex < varcount; varindex++) {
        long cachedBytesRead = 0;

        // check if there is a cached temp file:

        File cachedTempFile = columnTempFiles[varindex];
        if (cachedTempFile != null) {
            byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER];
            BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile));
            int readlen = 0;
            while ((readlen = cachedIn.read(cachedBytes)) > -1) {
                finalOut.write(cachedBytes, 0, readlen);
                cachedBytesRead += readlen;
            }
            cachedIn.close();
            // delete the temp file: 
            cachedTempFile.delete();

        }

        if (cachedBytesRead != cachedfileSizes[varindex]) {
            finalOut.close();
            throw new IOException("Could not read the correct number of bytes cached for column " + varindex
                    + "; " + cachedfileSizes[varindex] + " bytes expected, " + cachedBytesRead + " read.");
        }

        // then check if there are any bytes buffered for this column:

        if (bufferedSizes[varindex] > 0) {
            finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]);
        }

    }

    finalOut.close();
    return new File(rotatedImageFileName);

}

From source file:marytts.tools.dbselection.WikipediaMarkupCleaner.java

public Vector<String> removeMarkup(String page) {
    StringBuffer str = new StringBuffer("");
    StringBuffer line = null;/*from   w  w w .  ja v  a  2 s . c  o  m*/
    Vector<String> textList = new Vector<String>();

    boolean endOfText = false;
    Scanner s = null;
    try {
        s = new Scanner(page);
        while (s.hasNext() && !endOfText) {

            line = new StringBuffer(s.nextLine());
            // process text until it finds any of these labels:
            if (line.indexOf("==References") >= 0 || line.indexOf("== References") >= 0
                    || line.indexOf("==See also") >= 0 || line.indexOf("== See also") >= 0
                    || line.indexOf("==External links and sources") >= 0
                    || line.indexOf("==External links") >= 0 || line.indexOf("== External links") >= 0
                    || line.indexOf("== External Links") >= 0
                    || line.indexOf("== External links and sources") >= 0 || line.indexOf("==Notes") >= 0
                    || line.indexOf("== Notes") >= 0 || line.indexOf("==Sources") >= 0
                    || line.indexOf("== Sources") >= 0 || line.indexOf("==Foreign") >= 0
                    || line.indexOf("== Foreign") >= 0 || line.indexOf("==Discussion") >= 0) {
                endOfText = true;
            } else {
                // when removing sections it might add more lines that might contain again more labels to remove
                boolean clean = false;
                while (!clean && line.length() > 0) {
                    clean = true;
                    if (line.indexOf("<noinclude") >= 0) {
                        line = removeSection(s, line, "<noinclude", "</noinclude>");
                        clean = false;
                    }

                    if (line.indexOf("<includeonly") >= 0) {
                        line = removeSection(s, line, "<includeonly", "</includeonly>");
                        clean = false;
                    }

                    if (line.indexOf("<onlyinclude") >= 0) {
                        line = removeSection(s, line, "<onlyinclude", "</onlyinclude>");
                        clean = false;
                    }

                    if (line.indexOf("<table") >= 0) { // tables
                        line = removeSection(s, line, "<table", "</table>");
                        clean = false;
                    }

                    if (line.indexOf("<TABLE") >= 0) {
                        line = removeSection(s, line, "<TABLE", "</TABLE>");
                        clean = false;
                    }

                    if (line.indexOf("{{col-begin}}") >= 0) {
                        line = removeSection(s, line, "{{col-begin}}", "{{col-end}}");
                        clean = false;
                    }

                    if (line.indexOf("{|") >= 0) { // this is a table, this should go before {{ because a table can contain {{ }}
                        line = removeSectionTable(s, line, "{|", "|}");
                        clean = false;
                    }

                    if (line.indexOf("<ref") >= 0) { // references
                        line = removeSectionRef(s, line); // This is special because it can be <ref>, <ref, </ref> or />
                        clean = false;
                    }

                    if (line.indexOf("<REF") >= 0) {
                        line = removeSection(s, line, "<REF", "</REF>");
                        clean = false;
                    }

                    if (line.indexOf("<Ref") >= 0) {
                        line = removeSection(s, line, "<Ref", "</Ref>");
                        clean = false;
                    }
                    if (line.indexOf("<reF") >= 0) {
                        line = removeSection(s, line, "<reF", "</reF>");
                        clean = false;
                    }

                    if (line.indexOf("{{start box}}") >= 0) {
                        line = removeSection(s, line, "{{start box}}", "{{end box}}");
                        clean = false;
                    }

                    if (line.indexOf("{{") >= 0) {
                        line = removeSection(s, line, "{{", "}}");
                        clean = false;
                    }

                    if (line.indexOf("<!--") >= 0) {
                        line = removeSection(s, line, "<!--", "-->");
                        clean = false;
                    }

                    if (line.indexOf("\\mathrel{|") >= 0) {
                        line = removeSection(s, line, "\\mathrel{|", "}");
                        clean = false;
                    }

                    if (line.indexOf("<gallery") >= 0) { // gallery might contain several images
                        line = removeSection(s, line, "<gallery", "</gallery>");
                        clean = false;
                    }

                    if (line.indexOf("[[Image:") >= 0) {
                        line = removeSectionImage(s, line, "[[Image:", "]]");
                        clean = false;
                    }

                    if (line.indexOf("<div") >= 0) { // span and div tags are used to separate images from text
                        line = removeSection(s, line, "<div", "</div>");
                        clean = false;
                    }

                    if (line.indexOf("<DIV") >= 0) {
                        line = removeSectionImage(s, line, "<DIV", "</DIV>");
                        clean = false;
                    }

                    if (line.indexOf("<span") >= 0) {
                        line = removeSection(s, line, "<span", "</span>");
                        clean = false;
                    }

                    if (line.indexOf("<math>") >= 0) {
                        line = removeSection(s, line, "<math>", "</math>");
                        clean = false;
                    }

                    if (line.indexOf("<timeline>") >= 0) {
                        line = removeSection(s, line, "<timeline>", "</timeline>");
                        clean = false;
                    }

                    if (line.indexOf("<nowiki") >= 0) {
                        line = removeSection(s, line, "<nowiki", "</nowiki>");
                        clean = false;
                    }

                    if (line.indexOf("<source") >= 0) {
                        line = removeSection(s, line, "<source", "</source>");
                        clean = false;
                    }

                    if (line.indexOf("<code") >= 0) {
                        line = removeSection(s, line, "<code", "</code>");
                        clean = false;
                    }

                    if (line.indexOf("<imagemap") >= 0) {
                        line = removeSection(s, line, "<imagemap", "</imagemap>");
                        clean = false;
                    }

                    if (line.indexOf("<poem") >= 0) {
                        line = removeSection(s, line, "<poem", "</poem>");
                        clean = false;
                    }

                    if (line.indexOf("<h1") >= 0) {
                        line = removeSection(s, line, "<h1", "</h1>");
                        clean = false;
                    }

                    if (line.indexOf("<pre") >= 0) {
                        line = removeSection(s, line, "<pre", "</pre>");
                        clean = false;
                    }

                } // while the line/text is not clean (or does not have tags to remove)

                // here filter bulleted and numbered short lines
                if (line.length() > 0) {
                    if ((line.toString().startsWith("*") || line.toString().startsWith("#")
                            || line.toString().startsWith(";") || line.toString().startsWith(".")
                            || line.toString().startsWith(",") || line.toString().startsWith("&")
                            || line.toString().startsWith("}") || line.toString().startsWith("]")
                            || line.toString().startsWith("|") || line.toString().startsWith("ca:")
                            || line.toString().startsWith("cs:") || line.toString().startsWith("de:")
                            || line.toString().startsWith("es:") || line.toString().startsWith("fr:")
                            || line.toString().startsWith("it:") || line.toString().startsWith("hu:")
                            || line.toString().startsWith("ja:") || line.toString().startsWith("no:")
                            || line.toString().startsWith("pt:") || line.toString().startsWith("sl:")
                            || line.toString().startsWith("fi:") || line.toString().startsWith("sv:")
                            || line.toString().startsWith("tr:") || line.toString().startsWith("zh:")
                            || line.toString().startsWith("Category:") || line.toString().startsWith("!style=")
                            || line.toString().startsWith("!  style=") || line.toString().startsWith("!align=")
                            || line.toString().startsWith("::<code") || line.toString().endsWith("]]"))
                            && line.length() < 200)
                        line = new StringBuffer("");
                }
                // Now if the line is not empty, remove:
                //   '''''bold & italic'''''
                //   '''bold'''
                //   ''italic''
                // Internal links: 
                //   [[Name of page]]
                //   [[Name of page|Text to display]]
                // External links:
                //   [http://www.example.org Text to display]
                //   [http://www.example.org]
                //    http://www.example.org
                if (line.length() > 0) {

                    line = new StringBuffer(line.toString().replaceAll("'''''", ""));
                    line = new StringBuffer(line.toString().replaceAll("'''", ""));
                    line = new StringBuffer(line.toString().replaceAll("''", ""));

                    line = processInternalAndExternalLinks(line);

                    // this will convert HTML &nbsp; &ndash; etc. 
                    String strlineNoHTML = StringEscapeUtils.unescapeHtml(line.toString());
                    line = new StringBuffer(strlineNoHTML);

                    // The previous does not remove all HTML stuff, so here it is done some manually
                    line = new StringBuffer(line.toString().replaceAll("<big>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</big>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<blockquote>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</blockquote>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<BLOCKQUOTE>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</BLOCKQUOTE>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<sup>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</sup>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<sub>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</sub>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<small>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</small>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<ul>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</ul>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<UL>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</UL>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<br>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<br", ""));
                    line = new StringBuffer(line.toString().replaceAll("<BR>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<br", ""));
                    line = new StringBuffer(line.toString().replaceAll("<br/>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<Center>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<center>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</center>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<CENTER>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</CENTER>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<cite>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</cite>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<li>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</li>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<LI>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</LI>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<dl>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</dl>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<dt>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</dt>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<dd>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</dd>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<b>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</b>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<p>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</p>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<u>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</u>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<tt>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</tt>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<i>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</i>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<I>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</I>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<s>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</s>", ""));
                    line = new StringBuffer(line.toString().replaceAll("<em>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</em>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</br>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</div>", ""));
                    line = new StringBuffer(line.toString().replaceAll("</ref>", ""));
                    line = new StringBuffer(line.toString().replaceAll("/>", ""));

                    // Removing quotation marks
                    line = new StringBuffer(line.toString().replaceAll("\"", ""));
                    // these quotations have a strange/problematic symbol different from "
                    line = new StringBuffer(line.toString().replaceAll("", ""));
                    line = new StringBuffer(line.toString().replaceAll("?", ""));
                    // these symbol are also problematic, here they are changed.
                    line = new StringBuffer(line.toString().replaceAll("", "'"));
                    line = new StringBuffer(line.toString().replaceAll("", "-"));
                    line = new StringBuffer(line.toString().replaceAll("", "-"));

                    line = new StringBuffer(line.toString().replaceAll("", " "));
                    line = new StringBuffer(line.toString().replaceAll("", " "));

                    // finally sections and lists
                    boolean is_title = false;
                    if (line.toString().startsWith("==")) {
                        is_title = true;
                    }
                    line = new StringBuffer(line.toString().replaceAll("\\s*==+$|==+", ""));
                    if (is_title) {
                        line.append(".");
                    }

                    // bulleted list and numbered list
                    if (line.toString().startsWith("***") || line.toString().startsWith("*#*"))
                        line.replace(0, 3, "");
                    if (line.toString().startsWith("**") || line.toString().startsWith(":*")
                            || line.toString().startsWith("*#") || line.toString().startsWith("##")
                            || line.toString().startsWith("::"))
                        line.replace(0, 2, "");
                    if (line.toString().startsWith("*") || line.toString().startsWith("#"))
                        line.replace(0, 1, "");
                    if (line.toString().startsWith(";") || line.toString().startsWith(";")) // in glossaries definitions start with ;
                        line.replace(0, 1, "");

                    // remove this when the text is almost clean
                    if (line.indexOf("<font") >= 0)
                        line = removeSection(s, line, "<font", ">");
                    line = new StringBuffer(line.toString().replaceAll("</font>", ""));

                    if (line.indexOf("<blockquote") >= 0)
                        line = removeSection(s, line, "<blockquote", ">");

                    if (line.indexOf("<ol") >= 0)
                        line = removeSection(s, line, "<ol", ">");

                    if (line.indexOf("<http:") >= 0)
                        line = removeSection(s, line, "<http:", ">");

                    // finally concatenate the line  
                    str.append(line);
                    if (!str.toString().endsWith("\n"))
                        str.append("\n");

                    line = null;

                    // check length of the text 
                    if (str.length() > maxTextLength) {
                        textList.add(str.toString());
                        //System.out.println("\n-----------\n" + str.toString());
                        str = new StringBuffer("");
                    }

                }

            } // endOfText=false

        } // while has more lines

    } finally {
        if (s != null)
            s.close();
    }

    if (!str.toString().contentEquals(""))
        textList.add(str.toString());
    return textList;
}

From source file:org.apache.openaz.xacml.rest.XACMLPapServlet.java

/**
 * Requests from the Admin Console to create new items or update existing ones
 *
 * @param request/*  w w  w  .ja  va2 s.c o  m*/
 * @param response
 * @param groupId
 * @throws ServletException
 * @throws java.io.IOException
 */
private void doACPut(HttpServletRequest request, HttpServletResponse response, String groupId)
        throws ServletException, IOException {
    try {

        // for PUT operations the group may or may not need to exist before the operation can be done
        PDPGroup group = papEngine.getGroup(groupId);

        // determine the operation needed based on the parameters in the request

        // for remaining operations the group must exist before the operation can be done
        if (group == null) {
            logger.error("Unknown groupId '" + groupId + "'");
            response.sendError(HttpServletResponse.SC_NOT_FOUND, "Unknown groupId '" + groupId + "'");
            return;
        }
        if (request.getParameter("policy") != null) {
            // group=<groupId> policy=<policyId> contents=policy file <= Create new policy file in group
            // dir, or replace it if it already exists (do not touch properties)
            // TODO - currently this is done by the AC, but it should be done here by getting the policy
            // file out of the contents and saving to disk
            logger.error("PARTIALLY IMPLEMENTED!!!  ACTUAL CHANGES SHOULD BE MADE BY PAP SERVLET!!! ");
            response.setStatus(HttpServletResponse.SC_NO_CONTENT);
            return;
        } else if (request.getParameter("pdpId") != null) {
            // ARGS: group=<groupId> pdpId=<pdpId/URL> <= create a new PDP or Update an Existing one

            String pdpId = request.getParameter("pdpId");

            // get the request content into a String
            String json = null;
            // read the inputStream into a buffer (trick found online scans entire input looking for
            // end-of-file)
            Scanner scanner = new Scanner(request.getInputStream());
            scanner.useDelimiter("\\A");
            json = scanner.hasNext() ? scanner.next() : "";
            scanner.close();
            logger.info("JSON request from AC: " + json);

            // convert Object sent as JSON into local object
            ObjectMapper mapper = new ObjectMapper();

            Object objectFromJSON = mapper.readValue(json, StdPDP.class);

            if (pdpId == null || objectFromJSON == null || !(objectFromJSON instanceof StdPDP)
                    || ((StdPDP) objectFromJSON).getId() == null
                    || !((StdPDP) objectFromJSON).getId().equals(pdpId)) {
                logger.error(
                        "PDP new/update had bad input. pdpId=" + pdpId + " objectFromJSON=" + objectFromJSON);
                response.sendError(500, "Bad input, pdpid=" + pdpId + " object=" + objectFromJSON);
            }
            StdPDP pdp = (StdPDP) objectFromJSON;

            if (papEngine.getPDP(pdpId) == null) {
                // this is a request to create a new PDP object
                papEngine.newPDP(pdp.getId(), group, pdp.getName(), pdp.getDescription());
            } else {
                // this is a request to update the pdp
                papEngine.updatePDP(pdp);
            }

            response.setStatus(HttpServletResponse.SC_NO_CONTENT);
            if (logger.isDebugEnabled()) {
                logger.debug("PDP '" + pdpId + "' created/updated");
            }

            // adjust the group's state including the new PDP
            ((StdPDPGroup) group).resetStatus();

            // tell the Admin Consoles there is a change
            notifyAC();
            // this might affect the PDP, so notify it of the change
            pdpChanged(pdp);
            return;
        } else if (request.getParameter("pipId") != null) {
            // group=<groupId> pipId=<pipEngineId> contents=pip properties <= add a PIP to pip config, or
            // replace it if it already exists (lenient operation)
            // TODO
            logger.error("UNIMPLEMENTED ");
            response.sendError(HttpServletResponse.SC_BAD_REQUEST, "UNIMPLEMENTED");
            return;
        } else {
            // Assume that this is an update of an existing PDP Group
            // ARGS: group=<groupId> <= Update an Existing Group

            // get the request content into a String
            String json = null;
            // read the inputStream into a buffer (trick found online scans entire input looking for
            // end-of-file)
            Scanner scanner = new Scanner(request.getInputStream());
            scanner.useDelimiter("\\A");
            json = scanner.hasNext() ? scanner.next() : "";
            scanner.close();
            logger.info("JSON request from AC: " + json);

            // convert Object sent as JSON into local object
            ObjectMapper mapper = new ObjectMapper();

            Object objectFromJSON = mapper.readValue(json, StdPDPGroup.class);

            if (objectFromJSON == null || !(objectFromJSON instanceof StdPDPGroup)
                    || !((StdPDPGroup) objectFromJSON).getId().equals(group.getId())) {
                logger.error("Group update had bad input. id=" + group.getId() + " objectFromJSON="
                        + objectFromJSON);
                response.sendError(500, "Bad input, id=" + group.getId() + " object=" + objectFromJSON);
            }

            // The Path on the PAP side is not carried on the RESTful interface with the AC
            // (because it is local to the PAP)
            // so we need to fill that in before submitting the group for update
            ((StdPDPGroup) objectFromJSON).setDirectory(((StdPDPGroup) group).getDirectory());

            papEngine.updateGroup((StdPDPGroup) objectFromJSON);

            response.setStatus(HttpServletResponse.SC_NO_CONTENT);
            if (logger.isDebugEnabled()) {
                logger.debug("Group '" + group.getId() + "' updated");
            }
            // tell the Admin Consoles there is a change
            notifyAC();
            // Group changed, which might include changing the policies
            groupChanged(group);
            return;
        }
    } catch (PAPException e) {
        logger.error("AC PUT exception: " + e, e);
        response.sendError(500, e.getMessage());
        return;
    }
}

From source file:com.groupon.odo.HttpUtilities.java

/**
 * Sets up the given {@link org.apache.commons.httpclient.methods.PostMethod} to send the same standard POST data
 * as was sent in the given {@link HttpServletRequest}
 *
 * @param methodProxyRequest The {@link org.apache.commons.httpclient.methods.PostMethod} that we are configuring to send a
 *                           standard POST request
 * @param httpServletRequest The {@link HttpServletRequest} that contains the POST data to
 *                           be sent via the {@link org.apache.commons.httpclient.methods.PostMethod}
 * @param history            The {@link com.groupon.odo.proxylib.models.History} log for this request
 */// w ww.  j av  a  2  s .  c  om
@SuppressWarnings("unchecked")
public static void handleStandardPost(EntityEnclosingMethod methodProxyRequest,
        HttpServletRequest httpServletRequest, History history) throws Exception {
    String deserialisedMessages = "";
    byte[] requestByteArray = null;
    // Create a new StringBuffer with the data to be passed
    StringBuilder requestBody = new StringBuilder();
    InputStream body = httpServletRequest.getInputStream();
    java.util.Scanner s = new java.util.Scanner(body).useDelimiter("\\A");

    if (httpServletRequest.getContentType() != null
            && httpServletRequest.getContentType().contains(STRING_CONTENT_TYPE_FORM_URLENCODED)) {
        // Get the client POST data as a Map if content type is: application/x-www-form-urlencoded
        // We do this manually since some data is not properly parseable by the servlet request
        Map<String, String[]> mapPostParameters = HttpUtilities.mapUrlEncodedParameters(httpServletRequest);

        // Iterate the parameter names
        for (String stringParameterName : mapPostParameters.keySet()) {
            // Iterate the values for each parameter name
            String[] stringArrayParameterValues = mapPostParameters.get(stringParameterName);
            for (String stringParameterValue : stringArrayParameterValues) {
                // Create a NameValuePair and store in list

                // add an & if there is already data
                if (requestBody.length() > 0) {
                    requestBody.append("&");
                }

                requestBody.append(stringParameterName);

                // not everything has a value so lets check
                if (stringParameterValue.length() > 0) {
                    requestBody.append("=");
                    requestBody.append(stringParameterValue);
                }
            }
        }
    } else if (httpServletRequest.getContentType() != null
            && httpServletRequest.getContentType().contains(STRING_CONTENT_TYPE_MESSAGEPACK)) {

        /**
         * Convert input stream to bytes for it to be read by the deserializer
         * Unpack and iterate the list to see the contents
         */
        MessagePack msgpack = new MessagePack();
        requestByteArray = IOUtils.toByteArray(body);
        ByteArrayInputStream byteArrayIS = new ByteArrayInputStream(requestByteArray);
        Unpacker unpacker = msgpack.createUnpacker(byteArrayIS);

        for (Value message : unpacker) {
            deserialisedMessages += message;
            deserialisedMessages += "\n";
        }
    } else {
        // just set the request body to the POST body
        if (s.hasNext()) {
            requestBody.append(s.next());
        }
    }
    // Set the proxy request data
    StringRequestEntity stringEntity = new StringRequestEntity(requestBody.toString(), null, null);

    // set post body in history object
    history.setRequestPostData(requestBody.toString());

    // set post body in proxy request object
    methodProxyRequest.setRequestEntity(stringEntity);

    /**
     * Set the history to have decoded messagepack. Pass the byte data back to request
     */
    if (httpServletRequest.getContentType() != null
            && httpServletRequest.getContentType().contains(STRING_CONTENT_TYPE_MESSAGEPACK)) {
        history.setRequestPostData(deserialisedMessages);
        ByteArrayRequestEntity byteRequestEntity = new ByteArrayRequestEntity(requestByteArray);
        methodProxyRequest.setRequestEntity(byteRequestEntity);

    }
}

From source file:org.yccheok.jstock.gui.Utils.java

public static String downloadAsString(String location) {
    final Utils.InputStreamAndMethod inputStreamAndMethod = Utils
            .getResponseBodyAsStreamBasedOnProxyAuthOption(location);
    if (inputStreamAndMethod.inputStream == null) {
        inputStreamAndMethod.method.releaseConnection();
        return null;
    }/*  ww w . j a v a2  s.co  m*/
    try {
        java.util.Scanner s = new java.util.Scanner(inputStreamAndMethod.inputStream, "UTF-8")
                .useDelimiter("\\A");
        return s.hasNext() ? s.next() : null;
    } finally {
        org.yccheok.jstock.file.Utils.close(inputStreamAndMethod.inputStream);
        inputStreamAndMethod.method.releaseConnection();
    }
}

From source file:marytts.tools.dbselection.WikipediaMarkupCleaner.java

/****
 * This is also special because the line might contain sections with [[ ...  ]] so the ]] after a [[
 * is not the endTag of [[image:  ... ]]
 * @param s//from w w w  .  ja v  a  2s  .  c om
 * @param lineIn
 * @param iniTag
 * @param endTag
 * @param debug
 * @return
 */
private StringBuffer removeSectionImage(Scanner s, StringBuffer lineIn, String iniTag, String endTag) {
    String next;
    int index1 = 0, index2 = -1, index3 = -1, endTagLength = 0, numRef = 0, lastEndTag1 = 0, lastIniTag = 0;
    boolean closeRef = true;
    StringBuffer line = new StringBuffer(lineIn);
    StringBuffer nextLine;
    StringBuffer aux;

    if (debug)
        System.out.println("Removing tag: " + iniTag + "  LINE (BEFORE): " + line);

    while ((index1 = line.indexOf(iniTag)) >= 0) { // in one line can be more than one iniTag

        numRef++;
        index3 = endTagLength = index1;

        while (s.hasNext() && numRef > 0) {

            while ((index2 = line.indexOf("]]", endTagLength)) >= 0 && numRef > 0) {
                aux = new StringBuffer(line.subSequence(index1 + 2, index2 + 2));
                if (debug)
                    System.out.println("    aux=" + aux);
                if ((index3 = aux.indexOf("[[")) == -1) {
                    endTagLength = endTag.length() + index2;
                    numRef--;
                } else { // The previous was a [[ ]] inside of a [[Image: so it has to be deleted
                    index1 = index2;
                    endTagLength = index2 + 2;
                    index2 = -1;
                }
            }
            // so far it has not found the endTag, so get another line
            if (numRef > 0)
                line.append(s.nextLine());
        }

        if (numRef == 0) {
            index1 = line.indexOf(iniTag); // get again this because the position might change
            if (endTagLength > index1) {
                if (debug) {
                    System.out.println("    FINAL LINE: " + line);
                    System.out.print("iniTag: " + iniTag + "  index1=" + index1);
                    System.out.print("  endTagLength=" + endTagLength);
                    System.out.println("  line.length=" + line.length() + "  line: " + line);
                    System.out.println("  line.length=" + line.length());
                }
                line.delete(index1, endTagLength);
            } else {
                if (debug) {
                    System.out.println("removeSection: WARNING endTagLength > length of line: ");
                    System.out.print("iniTag: " + iniTag + "  index1=" + index1);
                    System.out.print("  endTagLength=" + endTagLength);
                    System.out.println("  line.length=" + line.length() + "  line: " + line);
                    System.out.println("removeSection: WARNING endTagLength > length of line: " + line);
                }
                line = new StringBuffer("");
            }

        } else {
            if (debug)
                System.out.println("removeSection: WARNING no " + endTag);
            line = new StringBuffer("");
        }

    } // while this line contains iniTag-s

    if (debug)
        System.out.println("    LINE (AFTER): " + line);
    return line;
}

From source file:marytts.tools.dbselection.WikipediaMarkupCleaner.java

private StringBuffer removeSectionRef(Scanner s, StringBuffer lineIn) {
    String next;/*  w w  w  .j  av a2  s.c  o m*/
    int index1 = 0, index2 = -1, index3 = -1, endTagLength = 0, numRef = 0;
    boolean closeRef = true;
    StringBuffer line = new StringBuffer(lineIn);
    StringBuffer nextLine;

    while ((index1 = line.indexOf("<ref")) >= 0) { // in one line can be more than one reference
        numRef++;
        if ((index2 = line.indexOf("</ref>", index1)) >= 0)
            endTagLength = 6 + index2;
        else if ((index3 = line.indexOf("/>", index1)) >= 0)
            endTagLength = 2 + index3;

        if (index2 == -1 && index3 == -1) {// the </ref> most be in the next lines, so get more lines until the </ref> is found
            while (s.hasNext() && numRef != 0) {
                nextLine = new StringBuffer(s.nextLine());
                if (nextLine.indexOf("<ref") >= 0)
                    numRef++;
                line.append(nextLine);
                if ((index2 = line.indexOf("</ref>", index1)) >= 0) {
                    numRef--;
                    endTagLength = 6 + index2;
                } else if ((index3 = line.indexOf("/>", index1)) >= 0) {
                    numRef--;
                    endTagLength = 2 + index3;
                }
            }

        } else // the endTag was found
            numRef--;

        if (numRef == 0) {
            index1 = line.indexOf("<ref"); // get again this because the position might change
            if (endTagLength > index1) {
                line.delete(index1, endTagLength);
                //System.out.println("nextline="+line);
            } else {
                if (debug) {
                    System.out.print("iniTag: <ref  index1=" + index1);
                    System.out.print("  endTagLength=" + endTagLength);
                    System.out.println("  line.length=" + line.length() + "  line: " + line);
                    System.out.println("removeSectionRef: WARNING endTagLength > length of line: " + line);
                    //line.delete(index1, line.length());
                }
                line = new StringBuffer("");
            }
        } else {
            if (debug)
                System.out.println("removeSectionRef: WARNING no </ref> or /> in " + line);
            //line.delete(index1, line.length());
            line = new StringBuffer("");
        }

    } // while this line contains iniTag-s

    return line;

}

From source file:marytts.tools.dbselection.WikipediaMarkupCleaner.java

private StringBuffer removeSection(Scanner s, StringBuffer lineIn, String iniTag, String endTag) {
    String next;//  w w  w.jav  a2s .  co  m
    int index1 = 0, index2 = -1, endTagLength = 0, numRef = 0, lastEndTag = 0, lastIniTag = 0;
    boolean closeRef = true;
    StringBuffer line = new StringBuffer(lineIn);
    StringBuffer nextLine;

    if (debug)
        System.out.println("Removing tag: " + iniTag + "  LINE (BEFORE): " + line);

    while ((index1 = line.indexOf(iniTag)) >= 0) { // in one line can be more than one iniTag

        numRef++;
        if ((index2 = line.indexOf(endTag, index1)) >= 0)
            endTagLength = endTag.length() + index2;

        if (index2 == -1) {// the iniTag most be in the next lines, so get more lines until the endTag is found
            lastEndTag = 0; // start to look for the endTag in 0

            while (s.hasNext() && numRef != 0) {
                lastIniTag = 0;
                nextLine = new StringBuffer(s.nextLine());
                //if(debug)
                //  System.out.println("  NEXTLINE: " + nextLine);

                while ((index1 = nextLine.indexOf(iniTag, lastIniTag)) >= 0) {
                    numRef++;
                    lastIniTag = iniTag.length() + index1;
                }

                line.append(nextLine);

                // next time it will look for the endTag after the position of the last it found.
                while ((index2 = line.indexOf(endTag, lastEndTag)) >= 0) {
                    numRef--;
                    lastEndTag = index2 + endTag.length(); // I need to remember where the last endTag was found
                    endTagLength = endTag.length() + index2;
                }

                //if(debug)
                //  System.out.println("LINE (numRef=" + numRef + "): " + line);
            }
        } else // the endTag was found
            numRef--;

        if (numRef == 0) {
            index1 = line.indexOf(iniTag); // get again this because the position might change
            if (endTagLength > index1) {
                if (debug) {
                    System.out.println("    FINAL LINE: " + line);
                    System.out.print("iniTag: " + iniTag + "  index1=" + index1);
                    System.out.print("  endTagLength=" + endTagLength);
                    System.out.println("  line.length=" + line.length() + "  line: " + line);
                    System.out.println("  line.length=" + line.length());
                }
                line.delete(index1, endTagLength);
            } else {
                if (debug) {
                    System.out.println("removeSection: WARNING endTagLength > length of line: ");
                    System.out.print("iniTag: " + iniTag + "  index1=" + index1);
                    System.out.print("  endTagLength=" + endTagLength);
                    System.out.println("  line.length=" + line.length() + "  line: " + line);
                    System.out.println("removeSection: WARNING endTagLength > length of line: " + line);
                }
                line = new StringBuffer("");
            }

            //System.out.println("nextline="+line);
        } else {
            if (debug)
                System.out.println("removeSection: WARNING no " + endTag);
            line = new StringBuffer("");
        }

    } // while this line contains iniTag-s

    if (debug)
        System.out.println("    LINE (AFTER): " + line);
    return line;
}