Example usage for org.apache.commons.io IOUtils lineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io IOUtils lineIterator.

Prototype

public static LineIterator lineIterator(InputStream input, String encoding) throws IOException

Source Link

Document

Return an Iterator for the lines in an InputStream, using the character encoding specified (or default encoding if null).

Usage

From source file:de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv2Reader.java

/**
 * Iterate through lines and create span annotations accordingly. For
 * multiple span annotation, based on the position of the annotation in the
 * line, update only the end position of the annotation
 *///from ww w .ja va2s.c o  m
private void setAnnotations(JCas aJcas, InputStream aIs, String aEncoding, StringBuilder text)
        throws IOException {

    // getting header information
    LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding);
    int columns = 1;// token number + token columns (minimum required)
    int tokenStart = 0, sentenceStart = 0;
    Map<Type, Set<Feature>> spanLayers = new LinkedHashMap<Type, Set<Feature>>();
    Map<Type, Type> relationayers = new LinkedHashMap<Type, Type>();

    // an annotation for every feature in a layer
    Map<Type, Map<Integer, AnnotationFS>> annotations = new LinkedHashMap<Type, Map<Integer, AnnotationFS>>();

    // store if this is a Begin/Intermediate/End of an annotation
    Map<Type, Map<Integer, String>> beginEndAnno = new LinkedHashMap<Type, Map<Integer, String>>();

    // Store annotations of tokens so that it can be used later for relation
    // annotations
    Map<Type, Map<String, List<AnnotationFS>>> tokenAnnotations = new LinkedHashMap<Type, Map<String, List<AnnotationFS>>>();

    // store target token ids used for a relation
    Map<Type, Map<String, List<String>>> relationTargets = new LinkedHashMap<Type, Map<String, List<String>>>();

    // store tokens indexing with the concat of itsbegin-end so that lemma
    // and pos annotation
    // can be attached, if exists, later
    indexedTokens = new HashMap<String, Token>();

    while (lineIterator.hasNext()) {
        String line = lineIterator.next().trim();
        if (line.trim().equals("") && sentenceStart == tokenStart) {
            continue;
        }
        if (line.trim().equals("")) {
            text.replace(tokenStart - 1, tokenStart, "");
            tokenStart = tokenStart - 1;
            Sentence sentence = new Sentence(aJcas, sentenceStart, tokenStart);
            sentence.addToIndexes();
            tokenStart++;
            sentenceStart = tokenStart;
            text.append("\n");
            continue;
        }
        // sentence
        if (line.startsWith("#text=")) {
            continue;
        }
        if (line.startsWith("#id=")) {
            continue;// it is a comment line
        }
        if (line.startsWith("#")) {
            columns = getLayerAndFeature(aJcas, columns, spanLayers, relationayers, line);
            continue;
        }
        // some times, the sentence in #text= might have a new line which
        // break this reader,
        // so skip such lines
        if (!Character.isDigit(line.split(" ")[0].charAt(0))) {
            continue;
        }

        // If we are still unlucky, the line starts with a number from the
        // sentence but not
        // a token number, check if it didn't in the format NUM-NUM
        if (!Character.isDigit(line.split("-")[1].charAt(0))) {
            continue;
        }

        int count = StringUtils.countMatches(line, "\t");

        if (columns != count) {
            throw new IOException(fileName + " This is not a valid TSV File. check this line: " + line);
        }

        // adding tokens and sentence
        StringTokenizer lineTk = new StringTokenizer(line, "\t");
        String tokenNumberColumn = lineTk.nextToken();
        String tokenColumn = lineTk.nextToken();
        Token token = new Token(aJcas, tokenStart, tokenStart + tokenColumn.length());
        token.addToIndexes();
        Type posType = JCasUtil.getType(aJcas, POS.class);
        Type lemmaType = JCasUtil.getType(aJcas, Lemma.class);
        if (spanLayers.containsKey(posType) || spanLayers.containsKey(lemmaType)) {
            indexedTokens.put(tokenStart + "-" + tokenStart + tokenColumn.length(), token);
        }

        // adding the annotations
        createSpanAnnotation(aJcas, tokenStart, spanLayers, relationayers, annotations, beginEndAnno,
                tokenAnnotations, relationTargets, lineTk, tokenColumn, tokenNumberColumn);

        tokenStart = tokenStart + tokenColumn.length() + 1;
        text.append(tokenColumn + " ");
    }
    if (tokenStart > sentenceStart) {
        Sentence sentence = new Sentence(aJcas, sentenceStart, tokenStart);
        sentence.addToIndexes();
        text.append("\n");
    }

    createRelationLayer(aJcas, relationayers, tokenAnnotations, relationTargets);
}

From source file:de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoCustomTsvReader.java

/**
 * Iterate through lines and create span annotations accordingly. For multiple span annotation,
 * based on the position of the annotation in the line, update only the end position of the
 * annotation//from  w  w w  .j a  v a  2s  .  c o  m
 */
private void setAnnotations(JCas aJcas, InputStream aIs, String aEncoding, StringBuilder text)
        throws IOException {

    // getting header information
    LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding);
    int columns = 1;// token number + token columns (minimum required)
    int tokenStart = 0, sentenceStart = 0;
    Map<Type, Set<Feature>> spanLayers = new LinkedHashMap<Type, Set<Feature>>();
    Map<Type, Type> relationayers = new LinkedHashMap<Type, Type>();

    // an annotation for every feature in a layer
    Map<Type, Map<Integer, AnnotationFS>> annotations = new LinkedHashMap<Type, Map<Integer, AnnotationFS>>();

    // store if this is a Begin/Intermediate/End of an annotation
    Map<Type, Map<Integer, String>> beginEndAnno = new LinkedHashMap<Type, Map<Integer, String>>();

    // Store annotations of tokens so that it can be used later for relation annotations
    Map<Type, Map<String, List<AnnotationFS>>> tokenAnnotations = new LinkedHashMap<Type, Map<String, List<AnnotationFS>>>();

    // store target token ids used for a relation
    Map<Type, Map<String, List<String>>> relationTargets = new LinkedHashMap<Type, Map<String, List<String>>>();

    // store tokens indexing with the concat of itsbegin-end so that lemma and pos annotation
    // can be attached, if exists, later
    indexedTokens = new HashMap<String, Token>();

    while (lineIterator.hasNext()) {
        String line = lineIterator.next().trim();
        if (line.trim().equals("") && sentenceStart == tokenStart) {
            continue;
        }
        if (line.trim().equals("")) {
            text.replace(tokenStart - 1, tokenStart, "");
            tokenStart = tokenStart - 1;
            Sentence sentence = new Sentence(aJcas, sentenceStart, tokenStart);
            sentence.addToIndexes();
            tokenStart++;
            sentenceStart = tokenStart;
            text.append("\n");
            continue;
        }
        // sentence
        if (line.startsWith("#text=")) {
            continue;
        }
        if (line.startsWith("#id=")) {
            continue;// it is a comment line
        }
        if (line.startsWith("#")) {
            columns = getLayerAndFeature(aJcas, columns, spanLayers, relationayers, line);
            continue;
        }
        // some times, the sentence in #text= might have a new line which break this reader,
        // so skip such lines
        if (!Character.isDigit(line.split(" ")[0].charAt(0))) {
            continue;
        }

        // If we are still unlucky, the line starts with a number from the sentence but not
        // a token number, check if it didn't in the format NUM-NUM
        if (!Character.isDigit(line.split("-")[1].charAt(0))) {
            continue;
        }

        int count = StringUtils.countMatches(line, "\t");

        if (columns != count) {
            throw new IOException(fileName + " This is not a valid TSV File. check this line: " + line);
        }

        // adding tokens and sentence
        StringTokenizer lineTk = new StringTokenizer(line, "\t");
        String tokenNumberColumn = lineTk.nextToken();
        String tokenColumn = lineTk.nextToken();
        Token token = new Token(aJcas, tokenStart, tokenStart + tokenColumn.length());
        token.addToIndexes();
        Type posType = JCasUtil.getType(aJcas, POS.class);
        Type lemmaType = JCasUtil.getType(aJcas, Lemma.class);
        if (spanLayers.containsKey(posType) || spanLayers.containsKey(lemmaType)) {
            indexedTokens.put(tokenStart + "-" + tokenStart + tokenColumn.length(), token);
        }

        // adding the annotations
        createSpanAnnotation(aJcas, tokenStart, spanLayers, relationayers, annotations, beginEndAnno,
                tokenAnnotations, relationTargets, lineTk, tokenColumn, tokenNumberColumn);

        tokenStart = tokenStart + tokenColumn.length() + 1;
        text.append(tokenColumn + " ");
    }
    if (tokenStart > sentenceStart) {
        Sentence sentence = new Sentence(aJcas, sentenceStart, tokenStart);
        sentence.addToIndexes();
        text.append("\n");
    }

    createRelationLayer(aJcas, relationayers, tokenAnnotations, relationTargets);
}

From source file:fr.gael.dhus.server.http.webapp.search.controller.SearchController.java

/**
 * Provides the openSearch description file via /search/description API.
 *
 * @param res response//from w  w w . ja va  2  s .  c  o m
 * @throws IOException if file description cannot be accessed
 */
@PreAuthorize("hasRole('ROLE_SEARCH')")
@RequestMapping(value = "/description")
public void search(HttpServletResponse res) throws IOException {
    String url = configurationManager.getServerConfiguration().getExternalUrl();
    if (url != null && url.endsWith("/")) {
        url = url.substring(0, url.length() - 1);
    }

    String long_name = configurationManager.getNameConfiguration().getLongName();
    String short_name = configurationManager.getNameConfiguration().getShortName();
    String contact_mail = configurationManager.getSupportConfiguration().getMail();

    InputStream is = ClassLoader.getSystemResourceAsStream(DESCRIPTION_FILE);
    if (is == null) {
        throw new IOException("Cannot find \"" + DESCRIPTION_FILE + "\" OpenSearch description file.");
    }

    LineIterator li = IOUtils.lineIterator(is, "UTF-8");

    try (ServletOutputStream os = res.getOutputStream()) {
        while (li.hasNext()) {
            String line = li.next();
            // Last line? -> the iterator eats LF
            if (li.hasNext()) {
                line = line + "\n";
            }

            line = line.replace("[dhus_server]", url);
            if (long_name != null) {
                line = line.replace("[dhus_long_name]", long_name);
            }
            if (short_name != null) {
                line = line.replace("[dhus_short_name]", short_name);
            }
            if (contact_mail != null) {
                line = line.replace("[dhus_contact_mail]", contact_mail);
            }

            os.write(line.getBytes());
        }
    } finally {
        IOUtils.closeQuietly(is);
        LineIterator.closeQuietly(li);
    }
}

From source file:com.norconex.collector.http.crawler.HttpCrawler.java

private void queueStartURLs(ICrawlDataStore crawlDataStore) {
    // Queue regular start urls
    String[] startURLs = getCrawlerConfig().getStartURLs();
    if (startURLs != null) {
        for (int i = 0; i < startURLs.length; i++) {
            String startURL = startURLs[i];
            executeQueuePipeline(new HttpCrawlData(startURL, 0), crawlDataStore);
        }/* ww w  .  j a  v a2s  .c  om*/
    }
    // Queue start urls define in one or more seed files
    String[] urlsFiles = getCrawlerConfig().getUrlsFiles();
    if (urlsFiles != null) {
        for (int i = 0; i < urlsFiles.length; i++) {
            String urlsFile = urlsFiles[i];
            LineIterator it = null;
            try {
                it = IOUtils.lineIterator(new FileInputStream(urlsFile), CharEncoding.UTF_8);
                while (it.hasNext()) {
                    String startURL = it.nextLine();
                    executeQueuePipeline(new HttpCrawlData(startURL, 0), crawlDataStore);
                }
            } catch (IOException e) {
                throw new CollectorException("Could not process URLs file: " + urlsFile, e);
            } finally {
                LineIterator.closeQuietly(it);
                ;
            }
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.api.datasets.DatasetFactory.java

private Map<String, DatasetDescriptionImpl> loadFromYaml() throws IOException {
    // Scan for locators
    PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
    Resource[] locators = resolver.getResources("classpath:META-INF/org.dkpro.core/datasets.txt");

    // Read locators
    Set<String> patterns = new LinkedHashSet<>();
    for (Resource locator : locators) {
        try (InputStream is = locator.getInputStream()) {
            IOUtils.lineIterator(is, "UTF-8").forEachRemaining(l -> patterns.add(l));
        }/*from   w w  w . j  ava2  s .  c  o  m*/
    }

    // Scan for YAML dataset descriptions
    List<Resource> resources = new ArrayList<>();
    for (String pattern : patterns) {
        for (Resource r : resolver.getResources(pattern)) {
            resources.add(r);
        }
    }

    // Configure YAML deserialization
    Constructor datasetConstructor = new Constructor(DatasetDescriptionImpl.class);
    TypeDescription datasetDesc = new TypeDescription(DatasetDescriptionImpl.class);
    datasetDesc.putMapPropertyType("artifacts", String.class, ArtifactDescriptionImpl.class);
    datasetDesc.putListPropertyType("licenses", LicenseDescriptionImpl.class);
    datasetConstructor.addTypeDescription(datasetDesc);
    TypeDescription artifactDesc = new TypeDescription(ArtifactDescriptionImpl.class);
    artifactDesc.putListPropertyType("actions", ActionDescriptionImpl.class);
    datasetConstructor.addTypeDescription(artifactDesc);
    Yaml yaml = new Yaml(datasetConstructor);

    // Ensure that there is a fixed order (at least if toString is correctly implemented)
    Collections.sort(resources, (a, b) -> {
        return a.toString().compareTo(b.toString());
    });

    // Load the YAML descriptions
    Map<String, DatasetDescriptionImpl> sets = new LinkedHashMap<>();
    for (Resource res : resources) {
        LOG.debug("Loading [" + res + "]");
        try (InputStream is = res.getInputStream()) {
            String id = FilenameUtils.getBaseName(res.getFilename());
            DatasetDescriptionImpl ds = yaml.loadAs(is, DatasetDescriptionImpl.class);
            ds.setId(id);
            ds.setOwner(this);

            // Inject artifact names into artifacts
            for (Entry<String, ArtifactDescription> e : ds.getArtifacts().entrySet()) {
                ((ArtifactDescriptionImpl) e.getValue()).setName(e.getKey());
            }

            sets.put(ds.getId(), ds);
        }
    }

    return sets;
}

From source file:net.sf.sprockets.database.sqlite.DbOpenHelper.java

/**
 * Execute the statements in the resource script on the database. Each statement must end with a
 * semicolon.//from  w  w  w .j  a v a  2 s. c  om
 */
private void execScript(SQLiteDatabase db, Resources res, int script) throws IOException {
    LineIterator lines = IOUtils.lineIterator(res.openRawResource(script), UTF_8);
    StringBuilder sql = new StringBuilder(2048); // enough capacity for a long statement
    try { // read each (potentially multi-line) statement and execute them one at a time
        while (lines.hasNext()) {
            String line = lines.next().trim();
            int length = line.length();
            if (length > 0) {
                sql.append(line).append("\n");
                if (line.charAt(length - 1) == ';') { // statement loaded
                    db.execSQL(sql.toString());
                    sql.setLength(0); // reset builder for a new statement
                }
            }
        }
    } finally {
        lines.close();
    }
}

From source file:com.daphne.es.showcase.excel.service.ExcelDataService.java

/**
 * csv?//from   w w  w.j  av a2  s  . c om
 * @param user
 * @param is
 */
@Async
public void importCvs(final User user, final InputStream is) {

    ExcelDataService proxy = ((ExcelDataService) AopContext.currentProxy());
    BufferedInputStream bis = null;
    try {
        long beginTime = System.currentTimeMillis();

        bis = new BufferedInputStream(is);
        String encoding = FileCharset.getCharset(bis);

        LineIterator iterator = IOUtils.lineIterator(bis, encoding);

        String separator = ",";
        int totalSize = 0; //?

        final List<ExcelData> dataList = Lists.newArrayList();

        if (iterator.hasNext()) {
            iterator.nextLine();//
        }

        while (iterator.hasNext()) {

            totalSize++;

            String line = iterator.nextLine();
            String[] dataArray = StringUtils.split(line, separator);

            ExcelData data = new ExcelData();
            data.setId(Long.valueOf(dataArray[0]));
            data.setContent(dataArray[1]);
            dataList.add(data);

            if (totalSize % batchSize == 0) {
                try {
                    proxy.doBatchSave(dataList);
                } catch (Exception e) {
                    Long fromId = dataList.get(0).getId();
                    Long endId = dataList.get(dataList.size() - 1).getId();
                    log.error("from " + fromId + " to " + endId + ", error", e);
                }
                dataList.clear();
            }
        }

        if (dataList.size() > 0) {
            proxy.doBatchSave(dataList);
        }

        long endTime = System.currentTimeMillis();

        Map<String, Object> context = Maps.newHashMap();
        context.put("seconds", (endTime - beginTime) / 1000);
        notificationApi.notify(user.getId(), "excelImportSuccess", context);
    } catch (Exception e) {
        log.error("excel import error", e);
        Map<String, Object> context = Maps.newHashMap();
        context.put("error", e.getMessage());
        notificationApi.notify(user.getId(), "excelImportError", context);
    } finally {
        IOUtils.closeQuietly(bis);
    }
}

From source file:edu.smu.tspell.wordnet.impl.file.InflectionData.java

/**
 * Reads the exceptions from a single file that correspond to the
 * exceptions for a particular synset type.
 * /*from www  .  j a  v  a 2 s  .c o m*/
 * @param  fileName Name of the file to read.
 * @param  type Syntactic type associated with the file.
 * @throws RetrievalException An error occurred reading the exception data.
 */
private void loadExceptions(String fileName, SynsetType type) throws IOException {
    StringTokenizer tokenizer;
    String inflection;
    String[] baseForms;

    String dir = PropertyNames.databaseDirectory;
    InputStream file = getClass().getResourceAsStream(dir + fileName);
    LineIterator iterator = IOUtils.lineIterator(file, null);
    //  Loop through all lines in the file
    while (iterator.hasNext()) {
        String line = iterator.nextLine();
        //  Parse the inflected word
        tokenizer = new StringTokenizer(line, WORD_DELIMITER);
        inflection = TextTranslator.translateToExternalFormat(tokenizer.nextToken());
        //  Get the inflected word's base forms
        baseForms = new String[tokenizer.countTokens()];
        for (int i = 0; i < baseForms.length; i++) {
            baseForms[i] = TextTranslator.translateToExternalFormat(tokenizer.nextToken());
        }
        //  Add an entry to the list for this word
        putMorphology(inflection, baseForms, type);
    }
    file.close();
}

From source file:$.MessageLogParser.java

/**
     * Gets lines which corresponds with specified correlation ID from the specified log file.
     *//from  www  . j  a v a 2 s.  c  o m
     * @param logFile the log file
     * @param correlationId the correlation ID
     * @return log lines
     * @throws IOException when error occurred during file reading
     */
    private List<String> getLogLines(File logFile, String correlationId) throws IOException {
        List<String> logLines = new ArrayList<String>();

        Log.debug("Go through the following log file: " + logFile);

        int year = Calendar.getInstance().get(Calendar.YEAR);
        String[] possibleYears = new String[] { String.valueOf(year - 1), String.valueOf(year) };

        InputStream stream = null;
        try {
            if (logFile.getName().endsWith(GZIP_FILE_EXTENSION)) {
                stream = new GZIPInputStream(new BufferedInputStream(new FileInputStream(logFile)));
            } else {
                stream = new BufferedInputStream(new FileInputStream(logFile));
            }

            LineIterator it = IOUtils.lineIterator(stream, Charset.defaultCharset());

            String requestId = null;
            boolean lastCorrectLine = false; // if previous log line belongs to requestId
            while (it.hasNext()) {
                String line = it.nextLine();

                if (requestId == null) {
                    if (StringUtils.contains(line, correlationId)) {
                        logLines.add(formatLogLine(line));

                        // finds requestID
                        requestId = getRequestId(line);

                        if (requestId != null) {
                            Log.debug("correlationId (" + correlationId + ") => requestId (" + requestId + ")");
                        }
                    }
                } else {
                    // adds lines with requestID and lines that belongs to previous log record (e.g. XML request)
                    //  it's better to check also correlationID because it's not one request ID
                    //  for all repeated scheduled jobs for processing of partly failed messages

                    // 2013-05-23 20:22:36,754 [MACHINE_IS_UNDEFINED, ajp-bio-8009-exec-19, /esb/ws/account/v1, ...
                    // <checkCustomerCreditRequest xmlns="cleverbus.org/ws/AccountService-v1">
                    //    <firstName>csd</firstName>
                    //    <lastName>acs</lastName>
                    //    <birthNumber>111111/1111</birthNumber>
                    // </checkCustomerCreditRequest>

                    if (StringUtils.contains(line, requestId) || (StringUtils.contains(line, correlationId))
                            || (lastCorrectLine && !StringUtils.startsWithAny(line, possibleYears))) {
                        logLines.add(formatLogLine(line));
                        lastCorrectLine = true;
                    } else {
                        lastCorrectLine = false;
                    }
                }
            }
        } finally {
            IOUtils.closeQuietly(stream);
        }

        return logLines;
    }

From source file:com.msopentech.odatajclient.engine.communication.request.batch.ODataBatchUtilities.java

/**
 * Reads headers from the batch starting from the given position.
 * <p>//from   w  w w  . j  ava  2s.  c o m
 * Retrieved headers will be added to the map given by target parameter.
 *
 * @param iterator batch iterator.
 * @param target destination of the retrieved headers.
 */
public static void readHeaders(final ODataBatchLineIterator iterator,
        final Map<String, Collection<String>> target) {

    try {
        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
        readBatchPart(new ODataBatchController(iterator, null), baos, true);

        final LineIterator headers = IOUtils.lineIterator(new ByteArrayInputStream(baos.toByteArray()),
                ODataConstants.UTF8);
        while (headers.hasNext()) {
            final String line = headers.nextLine().trim();
            if (StringUtils.isNotBlank(line)) {
                addHeaderLine(line, target);
            }
        }
    } catch (Exception e) {
        LOG.error("Error retrieving headers", e);
        throw new IllegalStateException(e);
    }
}