Example usage for org.hibernate ScrollableResults get

Introduction

In this page you can find the example usage for org.hibernate ScrollableResults get.

Prototype

Object[] get();

Source Link

Document

Get the current row of results.

Usage

From source file:com.mothsoft.alexis.engine.predictive.OpenNLPMaxentModelExecutorTask.java

License:Apache License

private double[] eval(final Model model, final Timestamp topOfPeriod, final Timestamp endOfPeriod,
        final MaxentModel maxentModel) {

    final ScrollableResults scrollableResults = this.documentDao.scrollableSearch(model.getUserId(), null,
            model.getTopic().getSearchExpression(), SortOrder.DATE_ASC, topOfPeriod, endOfPeriod);

    // initialize with an estimated size to prevent a lot of resizing
    final Map<String, Integer> contextMap = new LinkedHashMap<String, Integer>(64 * 1024);

    try {/*w w  w . j  a  va  2s  .c om*/
        while (scrollableResults.next()) {
            final Object[] row = scrollableResults.get();
            final Document document = (Document) row[0];

            if (document == null) {
                // caused by stale index
                continue;
            } else {
                OpenNLPMaxentContextBuilder.append(contextMap, document);
            }
        }
    } finally {
        scrollableResults.close();
    }

    final String[] context = new String[contextMap.size()];
    final float[] values = new float[contextMap.size()];

    // copy map to arrays
    OpenNLPMaxentContextBuilder.buildContextArrays(contextMap, context, values);

    // eval
    return maxentModel.eval(context, values);
}

From source file:com.mothsoft.alexis.engine.textual.TopicDocumentMatcherImpl.java

License:Apache License

private void mapMatches(final Topic topic, final Map<Long, List<TopicScore>> documentTopicMap) {
    final String query = topic.getSearchExpression();

    final ScrollableResults scrollableResults = this.documentDao.scrollableSearch(topic.getUserId(),
            DocumentState.PENDING_TOPIC_MATCHING, query, SortOrder.DATE_ASC, null, null);

    try {//from   www  . j  a  v a2s  . c om
        while (scrollableResults.next()) {
            final Object[] array = scrollableResults.get();

            // allow for state document index
            if (array[0] == null) {
                continue;
            }

            final DocumentScore documentScore = new DocumentScore((Document) array[0], (Float) array[1]);
            mapMatches(topic, documentScore, documentTopicMap);
        }
    } finally {
        scrollableResults.close();
    }
}

From source file:com.reignite.query.StructuredQuery.java

License:Open Source License

private void join(List<Object> rows) {
    for (Join join : joins) {
        Set<Object> ids = new HashSet<Object>();
        for (Object obj : rows) {
            if (obj instanceof Map) {
                ids.add(((Map<?, ?>) obj).get(join.getJoinId()));
            }//from ww w  . j  av a 2s . c  o  m
        }
        // prepare the join by setting the order and adding an "in"
        // clause
        join.prepare(ids);

        // if ids is size 1 then we are either doing a per row join or there is only 1 result to join to
        int firstRow = ids.size() == 1 ? join.getStartIndex() : 0;
        ScrollableResults scroll = join.getCriteria().scroll(ScrollMode.FORWARD_ONLY);

        if (scroll.setRowNumber(firstRow)) {
            do {
                Object[] row = scroll.get();
                mergeResult(rows, row, join);
            } while (scroll.next());
        }
        scroll.close();
    }
}

From source file:com.reignite.query.StructuredQuery.java

License:Open Source License

private int runQuery(Criteria criteria, QueryResult result, int maxResults) {
    ScrollableResults scroll = criteria.scroll(ScrollMode.FORWARD_ONLY);
    int count = 0;
    if (scroll.setRowNumber(startIndex)) {
        while (count < maxResults) {
            Object[] row = scroll.get();
            count = fillResult(result, row) ? count += 1 : count;
            if (!scroll.next()) {
                break;
            }//w  ww  . j  a v a  2s .  c  o m
        }
    }
    int totalResultCount = 0;
    if (scroll.last()) {
        totalResultCount = scroll.getRowNumber() + 1;
    }
    result.setTotalResults(totalResultCount);
    scroll.close();
    return count;
}

From source file:com.wci.umls.server.jpa.algo.RrfLoaderAlgorithm.java

License:Open Source License

/**
 * Load MRCONSO.RRF. This is responsible for loading {@link Atom}s and
 * {@link AtomClass}es./* w w w.j  av a2 s  .  co  m*/
 *
 * @throws Exception the exception
 */
private void loadMrconso() throws Exception {
    logInfo("  Load MRCONSO");
    logInfo("  Insert atoms and concepts ");

    // Set up maps
    String line = null;

    int objectCt = 0;
    final PushBackReader reader = readers.getReader(RrfReaders.Keys.MRCONSO);
    final String fields[] = new String[18];
    String prevCui = null;
    Concept cui = null;
    while ((line = reader.readLine()) != null) {

        line = line.replace("\r", "");
        FieldedStringTokenizer.split(line, "|", 18, fields);

        // Skip non-matching in single mode
        if (singleMode && !fields[11].equals(getTerminology())) {
            continue;
        }

        // Field Description
        // 0 CUI
        // 1 LAT
        // 2 TS
        // 3 LUI
        // 4 STT
        // 5 SUI
        // 6 ISPREF
        // 7 AUI
        // 8 SAUI
        // 9 SCUI
        // 10 SDUI
        // 11 SAB
        // 12 TTY
        // 13 CODE
        // 14 STR
        // 15 SRL
        // 16 SUPPRESS
        // 17 CVF
        //
        // e.g.
        // C0000005|ENG|P|L0000005|PF|S0007492|Y|A7755565||M0019694|D012711|MSH|PEN|D012711|(131)I-Macroaggregated
        // Albumin|0|N|256|

        // set the root terminology language
        loadedRootTerminologies.get(fields[11]).setLanguage(fields[1]);

        final Atom atom = new AtomJpa();
        atom.setLanguage(fields[1]);
        atom.setTimestamp(releaseVersionDate);
        atom.setLastModified(releaseVersionDate);
        atom.setLastModifiedBy(loader);
        atom.setObsolete(fields[16].equals("O"));
        atom.setSuppressible(!fields[16].equals("N"));
        atom.setPublished(true);
        atom.setPublishable(true);
        atom.setName(fields[14]);
        atom.setTerminology(fields[11]);
        if (loadedTerminologies.get(fields[11]) == null) {
            throw new Exception("Atom references terminology that does not exist: " + fields[11]);
        }
        atom.setVersion(loadedTerminologies.get(fields[11]).getVersion());
        // skip in single mode
        if (!singleMode) {
            atom.putAlternateTerminologyId(getTerminology(), fields[7]);
        }
        atom.setTerminologyId(fields[8]);
        atom.setTermType(fields[12]);
        atom.setWorkflowStatus(published);

        atom.setCodeId(fields[13]);
        atom.setDescriptorId(fields[10]);
        atom.setConceptId(fields[9]);

        atom.setStringClassId(fields[5]);
        atom.setLexicalClassId(fields[3]);
        atom.setCodeId(fields[13]);

        // Handle root terminology short name, hierarchical name, and sy names
        if (fields[11].equals("SRC") && fields[12].equals("SSN")) {
            final Terminology t = loadedTerminologies.get(fields[13].substring(2));
            if (t == null || t.getRootTerminology() == null) {
                logError("  Null root " + line);
            } else {
                t.getRootTerminology().setShortName(fields[14]);
            }
        }
        if (fields[11].equals("SRC") && fields[12].equals("RHT")) {
            final Terminology t = loadedTerminologies.get(fields[13].substring(2));
            if (t == null || t.getRootTerminology() == null) {
                logError("  Null root " + line);
            } else {
                t.getRootTerminology().setHierarchicalName(fields[14]);
            }
        }

        if (fields[11].equals("SRC") && fields[12].equals("RPT")) {
            final Terminology t = loadedTerminologies.get(fields[13].substring(2));
            if (t == null || t.getRootTerminology() == null) {
                logError("  Null root " + line);
            } else {
                t.getRootTerminology().setPreferredName(fields[14]);
            }
        }
        if (fields[11].equals("SRC") && fields[12].equals("RSY") && !fields[14].equals("")) {
            final Terminology t = loadedTerminologies.get(fields[13].substring(2));
            if (t == null || t.getRootTerminology() == null) {
                logError("  Null root " + line);
            } else {
                List<String> syNames = t.getRootTerminology().getSynonymousNames();
                syNames.add(fields[14]);
            }
        }

        // Handle terminology sy names
        if (fields[11].equals("SRC") && fields[12].equals("VSY") && !fields[14].equals("")) {
            final Terminology t = loadedTerminologies.get(fields[13].substring(2));
            if (t == null || t.getRootTerminology() == null) {
                logError("  Null root " + line);
            } else {
                List<String> syNames = t.getSynonymousNames();
                syNames.add(fields[14]);
            }
        }

        // Determine organizing class type for terminology
        if (!atom.getDescriptorId().equals("")) {
            termIdTypeMap.put(atom.getTerminology(), IdType.DESCRIPTOR);
        } else if (!atom.getConceptId().equals("")) {
            termIdTypeMap.put(atom.getTerminology(), IdType.CONCEPT);
        } // OTHERWISE it remains "CODE"

        // skip in single mode
        if (!singleMode) {
            atom.putConceptTerminologyId(getTerminology(), fields[0]);
        }

        // Add atoms and commit periodically
        addAtom(atom);
        logAndCommit(++objectCt, RootService.logCt, RootService.commitCt);
        atomIdMap.put(fields[7], atom.getId());
        atomTerminologyMap.put(fields[7], atom.getTerminology().intern());
        atomConceptIdMap.put(fields[7], atom.getConceptId().length() == 0 ? "".intern() : atom.getConceptId());
        atomCodeIdMap.put(fields[7], atom.getCodeId().length() == 0 ? "".intern() : atom.getCodeId());
        atomDescriptorIdMap.put(fields[7],
                atom.getDescriptorId().length() == 0 ? "".intern() : atom.getDescriptorId());

        // CUI - skip in single mode
        if (!singleMode) {
            // Add concept
            if (prevCui == null || !fields[0].equals(prevCui)) {
                if (prevCui != null) {
                    cui.setName(getComputedPreferredName(cui, list));
                    addConcept(cui);
                    conceptIdMap.put(cui.getTerminology() + cui.getTerminologyId(), cui.getId());
                    logAndCommit(++objectCt, RootService.logCt, RootService.commitCt);
                }
                cui = new ConceptJpa();
                cui.setTimestamp(releaseVersionDate);
                cui.setLastModified(releaseVersionDate);
                cui.setLastModifiedBy(loader);
                cui.setPublished(true);
                cui.setPublishable(true);
                cui.setTerminology(getTerminology());
                cui.setTerminologyId(fields[0]);
                cui.setVersion(getVersion());
                cui.setWorkflowStatus(published);
            }
            cui.getAtoms().add(atom);
            prevCui = fields[0];
        }

        // Handle Subset
        // C3539934|ENG|S|L11195730|PF|S13913746|N|A23460885||900000000000538005||SNOMEDCT_US|SB|900000000000538005|Description
        // format|9|N|256|
        if (fields[12].equals("SB")) {

            // Have to handle the type later, when we get to attributes
            final AtomSubset atomSubset = new AtomSubsetJpa();
            setSubsetFields(atomSubset, fields);
            cuiAuiAtomSubsetMap.put(fields[0] + fields[7], atomSubset);
            idTerminologyAtomSubsetMap.put(atomSubset.getTerminologyId() + atomSubset.getTerminology(),
                    atomSubset);
            final ConceptSubset conceptSubset = new ConceptSubsetJpa();
            setSubsetFields(conceptSubset, fields);
            cuiAuiConceptSubsetMap.put(fields[0] + fields[7], conceptSubset);
            idTerminologyConceptSubsetMap.put(conceptSubset.getTerminologyId() + conceptSubset.getTerminology(),
                    conceptSubset);
        }

    }
    // Add last concept
    if (prevCui != null) {
        cui.setName(getComputedPreferredName(cui, list));
        addConcept(cui);
        conceptIdMap.put(cui.getTerminology() + cui.getTerminologyId(), cui.getId());
        logAndCommit(++objectCt, RootService.logCt, RootService.commitCt);
    }

    // Set the terminology organizing class types
    for (final Terminology terminology : loadedTerminologies.values()) {
        final IdType idType = termIdTypeMap.get(terminology.getTerminology());
        if (idType != null && idType != IdType.CODE) {
            terminology.setOrganizingClassType(idType);
            updateTerminology(terminology);
        }
    }

    logInfo("  Add concepts");
    objectCt = 0;
    // NOTE: Hibernate-specific to support iterating
    // Restrict to timestamp used for THESE atoms, in case multiple RRF
    // files are loaded
    final Session session = manager.unwrap(Session.class);
    org.hibernate.Query hQuery = session.createQuery("select a from AtomJpa a " + "where conceptId is not null "
            + "and conceptId != '' and timestamp = :timestamp " + "order by terminology, conceptId");
    hQuery.setParameter("timestamp", releaseVersionDate);
    hQuery.setReadOnly(true).setFetchSize(2000).setCacheable(false);
    ScrollableResults results = hQuery.scroll(ScrollMode.FORWARD_ONLY);
    prevCui = null;
    cui = null;
    while (results.next()) {
        final Atom atom = (Atom) results.get()[0];
        if (atom.getConceptId() == null || atom.getConceptId().isEmpty()) {
            continue;
        }
        if (prevCui == null || !prevCui.equals(atom.getConceptId())) {
            if (cui != null) {
                // compute preferred name
                cui.setName(getComputedPreferredName(cui, list));
                addConcept(cui);
                conceptIdMap.put(cui.getTerminology() + cui.getTerminologyId(), cui.getId());
                logAndCommit(++objectCt, RootService.logCt, RootService.commitCt);
            }
            cui = new ConceptJpa();
            cui.setTimestamp(releaseVersionDate);
            cui.setLastModified(releaseVersionDate);
            cui.setLastModifiedBy(loader);
            cui.setPublished(true);
            cui.setPublishable(true);
            cui.setTerminology(atom.getTerminology());
            cui.setTerminologyId(atom.getConceptId());
            cui.setVersion(atom.getVersion());
            cui.setWorkflowStatus(published);
        }
        cui.getAtoms().add(atom);
        prevCui = atom.getConceptId();
    }
    if (cui != null) {
        cui.setName(getComputedPreferredName(cui, list));
        addConcept(cui);
        conceptIdMap.put(cui.getTerminology() + cui.getTerminologyId(), cui.getId());
        commitClearBegin();
    }
    results.close();
    logInfo("  Add descriptors");
    objectCt = 0;

    // NOTE: Hibernate-specific to support iterating
    hQuery = session.createQuery("select a from AtomJpa a " + "where descriptorId is not null "
            + "and descriptorId != '' and timestamp = :timestamp " + "order by terminology, descriptorId");
    hQuery.setParameter("timestamp", releaseVersionDate);
    hQuery.setReadOnly(true).setFetchSize(2000).setCacheable(false);
    results = hQuery.scroll(ScrollMode.FORWARD_ONLY);
    String prevDui = null;
    Descriptor dui = null;
    while (results.next()) {
        final Atom atom = (Atom) results.get()[0];
        if (atom.getDescriptorId() == null || atom.getDescriptorId().isEmpty()) {
            continue;
        }
        if (prevDui == null || !prevDui.equals(atom.getDescriptorId())) {
            if (dui != null) {
                // compute preferred name
                dui.setName(getComputedPreferredName(dui, list));
                addDescriptor(dui);
                descriptorIdMap.put(dui.getTerminology() + dui.getTerminologyId(), dui.getId());
                logAndCommit(++objectCt, RootService.logCt, RootService.commitCt);
            }
            dui = new DescriptorJpa();
            dui.setTimestamp(releaseVersionDate);
            dui.setLastModified(releaseVersionDate);
            dui.setLastModifiedBy(loader);
            dui.setPublished(true);
            dui.setPublishable(true);
            dui.setTerminology(atom.getTerminology());
            dui.setTerminologyId(atom.getDescriptorId());
            dui.setVersion(atom.getVersion());
            dui.setWorkflowStatus(published);
        }
        dui.getAtoms().add(atom);
        prevDui = atom.getDescriptorId();
    }
    if (dui != null) {
        dui.setName(getComputedPreferredName(dui, list));
        addDescriptor(dui);
        descriptorIdMap.put(dui.getTerminology() + dui.getTerminologyId(), dui.getId());
        commitClearBegin();
    }
    results.close();

    // Use flag to decide whether to handle codes
    if (codesFlag) {
        logInfo("  Add codes");
        objectCt = 0;
        // NOTE: Hibernate-specific to support iterating
        // Skip NOCODE
        // TODO: there is a LNC exception here -for now
        hQuery = session.createQuery("select a from AtomJpa a where codeId is not null "
                + "and codeId != '' and timestamp = :timestamp "
                + "and (terminology = 'LNC' OR (codeId != conceptId and codeId != descriptorId)) "
                + "and timestamp = :timestamp " + "order by terminology, codeId");
        hQuery.setParameter("timestamp", releaseVersionDate);
        hQuery.setReadOnly(true).setFetchSize(2000).setCacheable(false);
        results = hQuery.scroll(ScrollMode.FORWARD_ONLY);
        String prevCode = null;
        Code code = null;
        while (results.next()) {
            final Atom atom = (Atom) results.get()[0];
            if (atom.getCodeId() == null || atom.getCodeId().isEmpty() || atom.getCodeId().equals("NOCODE")) {
                continue;
            }
            if (prevCode == null || !prevCode.equals(atom.getCodeId())) {
                if (code != null) {
                    // compute preferred name
                    code.setName(getComputedPreferredName(code, list));
                    addCode(code);
                    codeIdMap.put(code.getTerminology() + code.getTerminologyId(), code.getId());
                    logAndCommit(++objectCt, RootService.logCt, 1000);
                }
                code = new CodeJpa();
                code.setTimestamp(releaseVersionDate);
                code.setLastModified(releaseVersionDate);
                code.setLastModifiedBy(loader);
                code.setPublished(true);
                code.setPublishable(true);
                code.setTerminology(atom.getTerminology());
                code.setTerminologyId(atom.getCodeId());
                code.setVersion(atom.getVersion());
                code.setWorkflowStatus(published);
            }
            code.getAtoms().add(atom);
            prevCode = atom.getCodeId();
        }
        if (code != null) {
            code.setName(getComputedPreferredName(code, list));
            addCode(code);
            codeIdMap.put(code.getTerminology() + code.getTerminologyId(), code.getId());
            commitClearBegin();
        }
        results.close();
    }

    // NOTE: for efficiency and lack of use cases, we've temporarily
    // suspended the loading of LexicalClass and StringClass objects

    // // NOTE: atoms are not connected to lexical classes as there are
    // // currently no known uses for this.
    // logInfo(" Add lexical classes");
    // objectCt = 0;
    // query = NEED TO FIX THIS
    // manager
    // .createQuery("select a.id from AtomJpa a order by lexicalClassId");
    // String prevLui = null;
    // LexicalClass lui = null;
    // LexicalClass atoms = null;
    // for (final Long id : (List<Long>) query.getResultList()) {
    // final Atom atom = getAtom(id);
    // if (atom.getLexicalClassId() == null
    // || atom.getLexicalClassId().isEmpty()) {
    // continue;
    // }
    // if (prevLui == null || !prevLui.equals(atom.getLexicalClassId())) {
    // if (lui != null) {
    // // compute preferred name
    // lui.setName(getComputedPreferredName(atoms));
    // addLexicalClass(lui);
    // logAndCommit(++objectCt, RootService.logCt, RootService.commitCt);
    // }
    // // just used to hold atoms, enver saved.
    // atoms = new LexicalClassJpa();
    // lui = new LexicalClassJpa();
    // lui.setTimestamp(releaseVersionDate);
    // lui.setLastModified(releaseVersionDate);
    // lui.setLastModifiedBy(loader);
    // lui.setPublished(true);
    // lui.setPublishable(true);
    // lui.setTerminology(terminology);
    // lui.setTerminologyId(atom.getLexicalClassId());
    // lui.setVersion(version);
    // lui.setWorkflowStatus(published);
    // lui.setNormalizedString(getNormalizedString(atom.getName()));
    // }
    // atoms.addAtom(atom);
    // prevLui = atom.getLexicalClassId();
    // }
    // if (lui != null) {
    // lui.setName(getComputedPreferredName(atoms));
    // commitClearBegin();
    // logAndCommit(++objectCt, RootService.logCt, RootService.commitCt);
    // }
    //
    // // NOTE: currently atoms are not loaded for string classes
    // // We simply load the objects themselves ( for SUI maintenance)
    // // There are no known use cases for having the atoms here.
    // logInfo(" Add string classes");
    // objectCt = 0;
    // query = NEED TO FIX THIS
    // manager
    // .createQuery("select distinct stringClassId, name from AtomJpa a");
    // for (final Object[] suiFields : (List<Object[]>) query.getResultList()) {
    // final StringClass sui = new StringClassJpa();
    // sui.setTimestamp(releaseVersionDate);
    // sui.setLastModified(releaseVersionDate);
    // sui.setLastModifiedBy(loader);
    // sui.setPublished(true);
    // sui.setPublishable(true);
    // sui.setTerminology(terminology);
    // sui.setTerminologyId(suiFields[0].toString());
    // sui.setVersion(version);
    // sui.setWorkflowStatus(published);
    // sui.setName(suiFields[1].toString());
    // addStringClass(sui);
    // logAndCommit(++objectCt, RootService.logCt, RootService.commitCt);
    // }

    // commit
    commitClearBegin();

    logInfo("  Update terminologies for languages and names.");

    // Update all root terminologies now that we know languages and names
    for (final RootTerminology root : loadedRootTerminologies.values()) {
        updateRootTerminology(root);
    }

    // Update all root terminologies now that we know languages and names
    for (final Terminology terminology : loadedTerminologies.values()) {
        updateTerminology(terminology);
    }
    commitClearBegin();

}

From source file:de.tudarmstadt.ukp.lmf.api.UbyStatistics.java

License:Apache License

/**
 * Return a {@link Set} of {@link String} instances consisting of <code>lemma+"_"+part-of-speech</code>,
 *       filtered by given {@link Lexicon} name.<br>
 * The lemma is obtained from the written form of the first {@link FormRepresentation} of the {@link Lemma}
 * instance.//  w  w  w. j av a2  s  .  com
 * @param lexiconName
 *          name of the lexicon which lemmas should be used
 * 
 * @return a set of strings containing lemma and part-of-speech of the specified lexicon.<br>
 * This method returns an empty set if the lexicon with the specified name does no exist.
 * 
 * @see Lemma#getFormRepresentations()
 * @see FormRepresentation#getWrittenForm()
 * @see EPartOfSpeech
 */
public Set<String> getLemmaPosPerLexicon(String lexiconName) {
    Criteria criteria = session.createCriteria(Lexicon.class, "l");
    criteria = criteria.createCriteria("lexicalEntries", "e");
    if (lexiconName != null) {
        criteria = criteria.add(Restrictions.eq("l.name", lexiconName));
    }
    criteria = criteria.createCriteria("lemma").createCriteria("formRepresentations", "f")
            .setProjection(Projections.projectionList().add(Property.forName("f.writtenForm"))
                    .add(Property.forName("e.partOfSpeech")));
    ScrollableResults res = criteria.scroll();
    ArrayList<String> out = new ArrayList<String>();
    while (res.next()) {
        Object[] r = res.get();
        if (r[1] != null) { // some resources do not have POS
            out.add((String) r[0] + "_" + ((EPartOfSpeech) r[1]).toString());
        } else {
            out.add((String) r[0] + "_null");
        }

    }
    HashSet<String> out2 = new HashSet<String>(out);
    return out2;
}

From source file:de.tudarmstadt.ukp.lmf.api.UbyStatistics.java

License:Apache License

/**
 * Return a {@link Set} of {@link String} instances consisting of <code>lemma+"_"+part-of-speech</code>,
 *       filtered by given {@link Lexicon} name, part-of-speech prefix and a language identifier.<br>
 * The lemma is obtained from the written form of the first {@link FormRepresentation} of the {@link Lemma}
 * instance.//from   w w w . j a v a2 s  .  com
 * 
 * @param lexiconName
 *          name of the lexicon which lemmas should be used
 * 
 * @param prefix the part-of-speech prefix used when filtering {@link LexicalEntry} instances
 * 
 * @param lang the language identifier used when filtering lexical entries
 * 
 * @return a set of strings containing lemma and part-of-speech of the specified lexicon.<br>
 * 
 * This method returns an empty set if the lexicon with the specified name does no exist or
 * the lexicon does not contain any lexical entries with specified part-of-speech prefix and language
 * identifier.
 * 
 * @see Lemma#getFormRepresentations()
 * @see FormRepresentation#getWrittenForm()
 * @see EPartOfSpeech
 * @see ELanguageIdentifier
 */
public Set<String> getLemmaPosPerLexiconAndPosPrefixAndLanguage(String lexiconName, String prefix,
        String lang) {
    Criteria criteria = session.createCriteria(Lexicon.class, "l");

    criteria = criteria.createCriteria("lexicalEntries", "e");
    if (lexiconName != null) {
        criteria = criteria.add(Restrictions.eq("l.name", lexiconName));
    }
    if (lang != null) {
        criteria = criteria.add(Restrictions.eq("l.languageIdentifier", lang));
    }
    if (prefix != null) {
        criteria = criteria.add(Restrictions.sqlRestriction("partOfSpeech like '" + prefix + "'"));
    }
    criteria = criteria.createCriteria("lemma").createCriteria("formRepresentations", "f")
            .setProjection(Projections.projectionList().add(Property.forName("f.writtenForm"))
                    .add(Property.forName("e.partOfSpeech")));
    ScrollableResults res = criteria.scroll();
    ArrayList<String> out = new ArrayList<String>();
    while (res.next()) {
        Object[] r = res.get();
        if (r[1] != null) {
            out.add((String) r[0] + "_" + ((EPartOfSpeech) r[1]).toString());
        } else {
            out.add((String) r[0] + "_null");
        }
    }
    HashSet<String> out2 = new HashSet<String>(out);
    return out2;

}

From source file:de.tudarmstadt.ukp.lmf.transform.DBToXMLTransformer.java

License:Apache License

protected void doTransform(boolean includeAxes, final Lexicon... includeLexicons) throws SAXException {
    final int bufferSize = 100;
    commitCounter = 1;/* w  w  w .  j  a va  2 s .  c om*/

    writeStartElement(lexicalResource);

    // Iterate over all lexicons
    if (includeLexicons == null || includeLexicons.length > 0) {
        for (Lexicon lexicon : lexicalResource.getLexicons()) {
            String lexiconName = lexicon.getName();

            // Check if we want to include this lexicon.
            if (includeLexicons != null) {
                boolean found = false;
                for (Lexicon l : includeLexicons) {
                    if (lexiconName.equals(l.getName())) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    continue;
                }
            }

            logger.info("Processing lexicon: " + lexiconName);
            writeStartElement(lexicon);

            // Iterate over all possible sub-elements of this Lexicon and
            // write them to the XML
            Class<?>[] lexiconClassesToSave = { LexicalEntry.class, SubcategorizationFrame.class,
                    SubcategorizationFrameSet.class, SemanticPredicate.class, Synset.class,
                    SynSemCorrespondence.class,
                    //ConstraintSet.class
            };

            //  "Unfortunately, MySQL does not treat large offset values efficiently by default and will still read all the rows prior to an offset value. It is common to see a query with an offset above 100,000 take over 20 times longer than an offset of zero!"
            // http://www.numerati.com/2012/06/26/reading-large-result-sets-with-hibernate-and-mysql/
            for (Class<?> clazz : lexiconClassesToSave) {
                /*DetachedCriteria criteria = DetachedCriteria.forClass(clazz)
                      .add(Restrictions.sqlRestriction("lexiconId = '" + lexicon.getId() + "'"));
                CriteriaIterator<Object> iter = new CriteriaIterator<Object>(criteria, sessionFactory, bufferSize);
                while (iter.hasNext()) {
                   Object obj = iter.next();
                   writeElement(obj);
                   session.evict(obj);
                   commitCounter++;
                   if (commitCounter % 1000 == 0)
                      logger.info("progress: " + commitCounter  + " class instances written to file");
                }*/
                Session lookupSession = sessionFactory.openSession();
                Query query = lookupSession.createQuery("FROM " + clazz.getSimpleName() + " WHERE lexiconId = '"
                        + lexicon.getId() + "' ORDER BY id");
                query.setReadOnly(true);
                if (DBConfig.MYSQL.equals(dbConfig.getDBType())) {
                    query.setFetchSize(Integer.MIN_VALUE); // MIN_VALUE gives hint to JDBC driver to stream results
                } else {
                    query.setFetchSize(1000);
                }
                ScrollableResults results = query.scroll(ScrollMode.FORWARD_ONLY);
                while (results.next()) {
                    // For streamed query results, no further queries are allowed (incl. lazy proxy queries!)
                    // Detach the object from the lookup session and reload it using the "official" session.
                    Object[] rows = results.get();
                    Object row = rows[0];
                    lookupSession.evict(row);
                    lookupSession.evict(rows);
                    rows = null;
                    row = session.get(row.getClass(), ((IHasID) row).getId());
                    writeElement(row);
                    session.evict(row);
                    row = null;
                    commitCounter++;
                    if (commitCounter % 1000 == 0) {
                        logger.info("progress: " + commitCounter + " class instances written to file");
                    }
                    if (commitCounter % 10000 == 0) {
                        closeSession();
                        openSession();
                    }
                }
                results.close();
                lookupSession.close();
            }
            writeEndElement(lexicon);
        }
    }

    // Iterate over SenseAxes and write them to XMLX when not only
    // lexicons should be converted
    if (includeAxes) {
        logger.info("Processing sense axes");
        DetachedCriteria criteria = DetachedCriteria.forClass(SenseAxis.class)
                .add(Restrictions.sqlRestriction("lexicalResourceId = '" + lexicalResource.getName() + "'"));
        CriteriaIterator<Object> iter = new CriteriaIterator<Object>(criteria, sessionFactory, bufferSize);
        while (iter.hasNext()) {
            Object obj = iter.next();
            writeElement(obj);
            session.evict(obj);
            commitCounter++;
            if (commitCounter % 1000 == 0) {
                logger.info("progress: " + commitCounter + " class instances written to file");
            }
        }

        logger.info("Processing predicateargument axes");
        DetachedCriteria criteria2 = DetachedCriteria.forClass(PredicateArgumentAxis.class)
                .add(Restrictions.sqlRestriction("lexicalResourceId = '" + lexicalResource.getName() + "'"));
        CriteriaIterator<Object> iter2 = new CriteriaIterator<Object>(criteria2, sessionFactory, bufferSize);
        while (iter2.hasNext()) {
            Object obj = iter2.next();
            writeElement(obj);
            session.evict(obj);
            commitCounter++;
            if (commitCounter % 1000 == 0) {
                logger.info("progress: " + commitCounter + " class instances written to file");
            }
        }

    }
    writeEndElement(lexicalResource);

    writeEndDocument();
}

From source file:edu.emory.library.tast.util.CSVUtils.java

License:Open Source License

private static DictionaryInfo[] getAllData(Session sess, TastDbQuery query, boolean useSQL,
        ZipOutputStream zipStream, boolean codes, String conditions) throws FileNotFoundException, IOException {

    SimpleDateFormat dateFormatter = new SimpleDateFormat(
            AppConfig.getConfiguration().getString(AppConfig.FORMAT_DATE_CVS));

    //insert the bom - byte order marker
    final byte[] bom = new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF };
    zipStream.write(bom);//from   w  w w. j  a  va  2 s  .  c  o  m
    CSVWriter writer = new CSVWriter(new OutputStreamWriter(zipStream, encoding), ',');

    //CSVWriter writer = new CSVWriter(new OutputStreamWriter(zipStream), ',');
    ScrollableResults queryResponse = null;

    Map dictionaries = new HashMap();

    try {
        queryResponse = query.executeScrollableQuery(sess, useSQL);

        Attribute[] populatedAttrs = query.getPopulatedAttributes();

        if (conditions != "") {
            String[] con = new String[1];
            con[0] = conditions;
            writer.writeNext(con);
        }

        String[] row = new String[populatedAttrs.length - 1];
        for (int i = 1; i < populatedAttrs.length; i++) {
            row[i - 1] = populatedAttrs[i].getName();
        }

        writer.writeNext(row);

        int cnt = 0;

        while (queryResponse.next()) {

            cnt++;

            Object[] result = queryResponse.get();

            row = new String[populatedAttrs.length - 1];
            for (int j = 1; j < populatedAttrs.length; j++) {
                if (result[j] == null) {
                    row[j - 1] = "";
                } else {
                    if (!codes) {
                        if (result[j] instanceof Date)
                            row[j - 1] = dateFormatter.format(result[j]);
                        else
                            row[j - 1] = result[j].toString();
                        if (result[j] instanceof Dictionary) {
                            if (dictionaries.containsKey(populatedAttrs[j].toString())) {
                                DictionaryInfo info = (DictionaryInfo) dictionaries
                                        .get(populatedAttrs[j].toString());
                                if (!info.attributes.contains(populatedAttrs[j])) {
                                    info.attributes.add(populatedAttrs[j]);
                                }
                            } else {
                                DictionaryInfo info = new DictionaryInfo();
                                info.attributes.add(populatedAttrs[j]);
                                info.dictionary = result[j].getClass();
                                dictionaries.put(populatedAttrs[j].toString(), info);
                            }
                        }
                    } else {
                        if (result[j] instanceof Dictionary) {
                            row[j - 1] = ((Dictionary) result[j]).getId().toString();
                            if (dictionaries.containsKey(populatedAttrs[j].toString())) {
                                DictionaryInfo info = (DictionaryInfo) dictionaries
                                        .get(populatedAttrs[j].toString());
                                if (!info.attributes.contains(populatedAttrs[j])) {
                                    info.attributes.add(populatedAttrs[j]);
                                }
                            } else {
                                DictionaryInfo info = new DictionaryInfo();
                                info.attributes.add(populatedAttrs[j]);
                                info.dictionary = result[j].getClass();
                                dictionaries.put(populatedAttrs[j].toString(), info);
                            }
                        } else {
                            if (result[j] instanceof Date)
                                row[j - 1] = dateFormatter.format(result[j]);
                            else
                                row[j - 1] = result[j].toString();
                        }
                    }
                }
            }
            writer.writeNext(row);
        }

        writer.writeNext(new String[] { "The number of total records: " + cnt });

        writer.flush();
        return (DictionaryInfo[]) dictionaries.values().toArray(new DictionaryInfo[] {});

    } finally {
        if (queryResponse != null) {
            queryResponse.close();
        }
    }
}

From source file:edu.emory.library.tast.util.CSVUtils.java

License:Open Source License

private static void getAllData(Session sess, TastDbQuery query, boolean useSQL, ZipOutputStream zipStream,
        boolean codes) throws FileNotFoundException, IOException {
    SimpleDateFormat dateFormatter = new SimpleDateFormat(
            AppConfig.getConfiguration().getString(AppConfig.FORMAT_DATE_CVS));
    //insert the bom - byte order marker
    final byte[] bom = new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF };
    zipStream.write(bom);/*w ww  . j  a  v  a2 s  .  c o m*/
    CSVWriter writer = new CSVWriter(new OutputStreamWriter(zipStream, encoding), ',');

    //TODO this snippet below is used for testing purposes only 
    /*File file = new File("c:\\tmp\\voyage.csv");
    FileOutputStream fout = new FileOutputStream(file);
    final byte[] bom = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF };              
     fout.write(bom);       
    CSVWriter writer = new CSVWriter(new OutputStreamWriter(fout, encoding), ',');*/

    ScrollableResults queryResponse = null;

    Map dictionaries = new HashMap();

    try {
        //query to retrieve users for the submissions 
        HashMap users = getUsersForSubmissions(sess);
        boolean usersExist = false;
        if (users != null && users.size() > 0) {
            usersExist = true;
        }
        //query for all the voyages
        queryResponse = query.executeScrollableQuery(sess, useSQL);

        Attribute[] populatedAttrs = query.getPopulatedAttributes();

        String[] row = new String[populatedAttrs.length + 1];
        int i;
        for (i = 0; i < populatedAttrs.length; i++) {
            row[i] = populatedAttrs[i].getName();
        }
        row[i] = "username";
        writer.writeNext(row);

        int cnt = 0;
        String userName = null;
        while (queryResponse.next()) {
            cnt++;
            Object[] result = queryResponse.get();

            row = new String[populatedAttrs.length + 1];
            int j;
            for (j = 0; j < populatedAttrs.length; j++) {
                if (populatedAttrs[j].getName().equals("iid")) {
                    userName = null;
                    if (usersExist) {
                        userName = (String) users.get(result[j]);
                    }
                }
                if (result[j] == null) {
                    row[j] = "";
                } else if (result[j] instanceof Date) {
                    row[j] = dateFormatter.format(result[j]);
                } else if (codes) {
                    if (result[j] instanceof Dictionary) {
                        row[j] = ((Dictionary) result[j]).getId().toString();
                    } else {
                        row[j] = result[j].toString();
                    }
                } else {//labels
                    row[j] = result[j].toString();
                }
            }
            if (userName != null) {
                row[j++] = userName;
            }
            writer.writeNext(row);
        }

        writer.flush();
    } catch (IOException io) {
        io.printStackTrace();
    } finally {
        if (queryResponse != null) {
            queryResponse.close();
        }
    }
}