List of usage examples for java.util LinkedList poll
public E poll()
From source file:org.nuxeo.ecm.core.opencmis.impl.server.CMISQLtoNXQL.java
/** * Gets the NXQL from a CMISQL query./*www .j a v a 2s. c o m*/ */ public String getNXQL(String cmisql, NuxeoCmisService service, Map<String, PropertyDefinition<?>> typeInfo, boolean searchAllVersions) throws QueryParseException { this.typeInfo = typeInfo; boolean searchLatestVersion = !searchAllVersions; TypeManagerImpl typeManager = service.repository.getTypeManager(); coreSession = service.coreSession; query = new QueryObject(typeManager); CmisQueryWalker walker = null; try { walker = QueryUtil.getWalker(cmisql); walker.setDoFullTextParse(false); walker.query(query, new AnalyzingWalker()); } catch (RecognitionException e) { String msg; if (walker == null) { msg = e.getMessage(); } else { msg = "Line " + e.line + ":" + e.charPositionInLine + " " + walker.getErrorMessage(e, walker.getTokenNames()); } throw new QueryParseException(msg, e); } catch (QueryParseException e) { throw e; } catch (Exception e) { throw new QueryParseException(e.getMessage() + " for query: " + cmisql, e); } if (query.getTypes().size() != 1 && query.getJoinedSecondaryTypes() == null) { throw new QueryParseException("JOINs not supported in query: " + cmisql); } fromType = query.getMainFromName(); BaseTypeId fromBaseTypeId = fromType.getBaseTypeId(); // now resolve column selectors to actual database columns for (CmisSelector sel : query.getSelectReferences()) { recordSelectSelector(sel); } for (CmisSelector sel : query.getJoinReferences()) { ColumnReference col = ((ColumnReference) sel); if (col.getTypeDefinition().getBaseTypeId() == BaseTypeId.CMIS_SECONDARY) { // ignore reference to ON FACET.cmis:objectId continue; } recordSelector(sel, JOIN); } for (CmisSelector sel : query.getWhereReferences()) { recordSelector(sel, WHERE); } for (SortSpec spec : query.getOrderBys()) { recordSelector(spec.getSelector(), ORDER_BY); } addSystemColumns(); List<String> whereClauses = new ArrayList<String>(); // what to select (result columns) String what = StringUtils.join(realColumns.values(), ", "); // determine relevant primary types String nxqlFrom; if (fromBaseTypeId == CMIS_RELATIONSHIP) { if (fromType.getParentTypeId() == null) { nxqlFrom = NXQL_RELATION; } else { nxqlFrom = fromType.getId(); } } else { nxqlFrom = NXQL_DOCUMENT; List<String> types = new ArrayList<String>(); if (fromType.getParentTypeId() != null) { // don't add abstract root types types.add(fromType.getId()); } LinkedList<TypeDefinitionContainer> typesTodo = new LinkedList<TypeDefinitionContainer>(); typesTodo.addAll(typeManager.getTypeDescendants(fromType.getId(), -1, Boolean.TRUE)); // recurse to get all subtypes TypeDefinitionContainer tc; while ((tc = typesTodo.poll()) != null) { types.add(tc.getTypeDefinition().getId()); typesTodo.addAll(tc.getChildren()); } if (types.isEmpty()) { // shoudn't happen types = Collections.singletonList("__NOSUCHTYPE__"); } // build clause StringBuilder pt = new StringBuilder(); pt.append(NXQL.ECM_PRIMARYTYPE); pt.append(" IN ("); for (Iterator<String> it = types.iterator(); it.hasNext();) { pt.append(QUOTE); pt.append(it.next()); pt.append(QUOTE); if (it.hasNext()) { pt.append(", "); } } pt.append(")"); whereClauses.add(pt.toString()); } // lifecycle not deleted filter if (skipDeleted) { whereClauses .add(String.format("%s <> '%s'", NXQL.ECM_LIFECYCLESTATE, LifeCycleConstants.DELETED_STATE)); } // searchAllVersions filter if (searchLatestVersion && fromBaseTypeId == CMIS_DOCUMENT) { whereClauses.add(String.format("%s = 1", NXQL.ECM_ISLATESTVERSION)); } // no proxies whereClauses.add(String.format("%s = 0", NXQL.ECM_ISPROXY)); // WHERE clause Tree whereNode = walker.getWherePredicateTree(); boolean distinct = false; if (whereNode != null) { GeneratingWalker generator = new GeneratingWalker(); generator.walkPredicate(whereNode); whereClauses.add(generator.buf.toString()); distinct = generator.distinct; } // ORDER BY clause List<String> orderbys = new ArrayList<String>(); for (SortSpec spec : query.getOrderBys()) { String orderby; CmisSelector sel = spec.getSelector(); if (sel instanceof ColumnReference) { orderby = (String) sel.getInfo(); } else { orderby = NXQL.ECM_FULLTEXT_SCORE; } if (!spec.ascending) { orderby += " DESC"; } orderbys.add(orderby); } // create the whole select String where = StringUtils.join(whereClauses, " AND "); String nxql = "SELECT " + (distinct ? "DISTINCT " : "") + what + " FROM " + nxqlFrom + " WHERE " + where; if (!orderbys.isEmpty()) { nxql += " ORDER BY " + StringUtils.join(orderbys, ", "); } // System.err.println("CMIS: " + statement); // System.err.println("NXQL: " + nxql); return nxql; }
From source file:CB_Core.Api.SearchForGeocaches_Core.java
String ParseJsonResult(Search search, CB_List<Cache> cacheList, ArrayList<LogEntry> logList, ArrayList<ImageEntry> imageList, long gpxFilenameId, String result, byte apiStatus, boolean isLite) { // Parse JSON Result try {//w w w .jav a 2s.c o m JSONTokener tokener = new JSONTokener(result); JSONObject json = (JSONObject) tokener.nextValue(); JSONObject status = json.getJSONObject("Status"); if (status.getInt("StatusCode") == 0) { result = ""; JSONArray caches = json.getJSONArray("Geocaches"); // Log.debug(log, "got " + caches.length() + " Caches from gc"); for (int i = 0; i < caches.length(); i++) { JSONObject jCache = (JSONObject) caches.get(i); String gcCode = jCache.getString("Code"); // Log.debug(log, "handling " + gcCode); String name = jCache.getString("Name"); result += gcCode + " - " + name + "\n"; Boolean CacheERROR = false; Cache cache = new Cache(true); cache.setArchived(jCache.getBoolean("Archived")); cache.setAttributesPositive(new DLong(0, 0)); cache.setAttributesNegative(new DLong(0, 0)); JSONArray jAttributes = jCache.getJSONArray("Attributes"); for (int j = 0; j < jAttributes.length(); j++) { JSONObject jAttribute = jAttributes.getJSONObject(j); int AttributeTypeId = jAttribute.getInt("AttributeTypeID"); Boolean isOn = jAttribute.getBoolean("IsOn"); Attributes att = Attributes.getAttributeEnumByGcComId(AttributeTypeId); if (isOn) { cache.addAttributePositive(att); } else { cache.addAttributeNegative(att); } } cache.setAvailable(jCache.getBoolean("Available")); cache.setDateHidden(new Date()); try { String dateCreated = jCache.getString("DateCreated"); int date1 = dateCreated.indexOf("/Date("); int date2 = dateCreated.lastIndexOf("-"); String date = (String) dateCreated.subSequence(date1 + 6, date2); cache.setDateHidden(new Date(Long.valueOf(date))); } catch (Exception exc) { Log.err(log, "SearchForGeocaches_ParseDate", exc); } cache.setDifficulty((float) jCache.getDouble("Difficulty")); // Ein evtl. in der Datenbank vorhandenen "Found" nicht berschreiben Boolean Favorite = LoadBooleanValueFromDB( "select Favorit from Caches where GcCode = \"" + gcCode + "\""); cache.setFavorite(Favorite); // Ein evtl. in der Datenbank vorhandenen "Found" nicht berschreiben Boolean Found = LoadBooleanValueFromDB( "select found from Caches where GcCode = \"" + gcCode + "\""); if (!Found) { cache.setFound(jCache.getBoolean("HasbeenFoundbyUser")); } else { cache.setFound(true); } cache.setGcCode(jCache.getString("Code")); try { cache.setGcId(jCache.getString("ID")); } catch (Exception e) { // CacheERROR = true; gibt bei jedem Cache ein // Fehler ??? } cache.setGPXFilename_ID(gpxFilenameId); // Ein evtl. in der Datenbank vorhandenen "Found" nicht berschreiben Boolean userData = LoadBooleanValueFromDB( "select HasUserData from Caches where GcCode = \"" + gcCode + "\""); cache.setHasUserData(userData); if (!isLite) { try { cache.setHint(jCache.getString("EncodedHints")); } catch (Exception e1) { cache.setHint(""); } } cache.Id = Cache.GenerateCacheId(cache.getGcCode()); cache.setListingChanged(false); if (!isLite) { try { cache.setLongDescription(jCache.getString("LongDescription")); } catch (Exception e1) { Log.err(log, "SearchForGeocaches_LongDescription:" + cache.getGcCode(), e1); cache.setLongDescription(""); } if (!jCache.getBoolean("LongDescriptionIsHtml")) { cache.setLongDescription( cache.getLongDescription().replaceAll("(\r\n|\n\r|\r|\n)", "<br />")); } } cache.setName(jCache.getString("Name")); cache.setTourName(""); cache.setNoteChecksum(0); cache.NumTravelbugs = jCache.getInt("TrackableCount"); JSONObject jOwner = jCache.getJSONObject("Owner"); cache.setOwner(jOwner.getString("UserName")); cache.setPlacedBy(cache.getOwner()); try { cache.Pos = new CoordinateGPS(jCache.getDouble("Latitude"), jCache.getDouble("Longitude")); } catch (Exception e) { } cache.Rating = 0; if (!isLite) { try { cache.setShortDescription(jCache.getString("ShortDescription")); } catch (Exception e) { Log.err(log, "SearchForGeocaches_shortDescription:" + cache.getGcCode(), e); cache.setShortDescription(""); } if (!jCache.getBoolean("ShortDescriptionIsHtml")) { cache.setShortDescription( cache.getShortDescription().replaceAll("(\r\n|\n\r|\r|\n)", "<br />")); } } JSONObject jContainer = jCache.getJSONObject("ContainerType"); int jSize = jContainer.getInt("ContainerTypeId"); cache.Size = CacheSizes.parseInt(GroundspeakAPI.getCacheSize(jSize)); cache.setSolverChecksum(0); cache.setTerrain((float) jCache.getDouble("Terrain")); cache.Type = CacheTypes.Traditional; try { JSONObject jCacheType = jCache.getJSONObject("CacheType"); cache.Type = GroundspeakAPI.getCacheType(jCacheType.getInt("GeocacheTypeId")); } catch (Exception e) { if (gcCode.equals("GC4K089")) { cache.Type = CacheTypes.Giga; } else { cache.Type = CacheTypes.Undefined; } } cache.setUrl(jCache.getString("Url")); cache.setApiStatus(apiStatus); // Ein evtl. in der Datenbank vorhandenen "Favorit" nicht berschreiben Boolean fav = LoadBooleanValueFromDB( "select favorit from Caches where GcCode = \"" + gcCode + "\""); cache.setFavorite(fav); // Chk if Own or Found Boolean exclude = false; if (search.excludeFounds && cache.isFound()) exclude = true; if (search.excludeHides && cache.getOwner().equalsIgnoreCase(CB_Core_Settings.GcLogin.getValue())) exclude = true; if (search.available && (cache.isArchived() || !cache.isAvailable())) exclude = true; if (!CacheERROR && !exclude) { cacheList.add(cache); // insert Logs JSONArray logs = jCache.getJSONArray("GeocacheLogs"); for (int j = 0; j < logs.length(); j++) { JSONObject jLogs = (JSONObject) logs.get(j); JSONObject jFinder = (JSONObject) jLogs.get("Finder"); JSONObject jLogType = (JSONObject) jLogs.get("LogType"); LogEntry logEntry = new LogEntry(); logEntry.CacheId = cache.Id; logEntry.Comment = jLogs.getString("LogText"); logEntry.Finder = jFinder.getString("UserName"); logEntry.Id = jLogs.getInt("ID"); logEntry.Timestamp = new Date(); try { String dateCreated = jLogs.getString("VisitDate"); int date1 = dateCreated.indexOf("/Date("); int date2 = dateCreated.indexOf("-"); String date = (String) dateCreated.subSequence(date1 + 6, date2); logEntry.Timestamp = new Date(Long.valueOf(date)); } catch (Exception exc) { Log.err(log, "API", "SearchForGeocaches_ParseLogDate", exc); } logEntry.Type = LogTypes.GC2CB_LogType(jLogType.getInt("WptLogTypeId")); logList.add(logEntry); } // insert Images int imageListSizeOrg = imageList.size(); JSONArray images = jCache.getJSONArray("Images"); for (int j = 0; j < images.length(); j++) { JSONObject jImage = (JSONObject) images.get(j); ImageEntry image = new ImageEntry(); image.CacheId = cache.Id; image.GcCode = cache.getGcCode(); image.Name = jImage.getString("Name"); image.Description = jImage.getString("Description"); image.ImageUrl = jImage.getString("Url").replace("img.geocaching.com/gc/cache", "img.geocaching.com/cache"); // remove "/gc" to match the url used in the description image.IsCacheImage = true; imageList.add(image); } int imageListSizeGC = images.length(); // insert images from Cache description LinkedList<String> allImages = null; if (!search.isLite) allImages = DescriptionImageGrabber.GetAllImages(cache); int imageListSizeGrabbed = 0; if (allImages != null && allImages.size() > 0) { imageListSizeGrabbed = allImages.size(); } while (allImages != null && allImages.size() > 0) { String url; url = allImages.poll(); boolean found = false; for (ImageEntry im : imageList) { if (im.ImageUrl.equalsIgnoreCase(url)) { found = true; break; } } if (!found) { ImageEntry image = new ImageEntry(); image.CacheId = cache.Id; image.GcCode = cache.getGcCode(); image.Name = url.substring(url.lastIndexOf("/") + 1); image.Description = ""; image.ImageUrl = url; image.IsCacheImage = true; imageList.add(image); } } log.debug("Merged imageList has " + imageList.size() + " Entrys (" + imageListSizeOrg + "/" + imageListSizeGC + "/" + imageListSizeGrabbed + ")"); // insert Waypoints JSONArray waypoints = jCache.getJSONArray("AdditionalWaypoints"); for (int j = 0; j < waypoints.length(); j++) { JSONObject jWaypoints = (JSONObject) waypoints.get(j); Waypoint waypoint = new Waypoint(true); waypoint.CacheId = cache.Id; try { waypoint.Pos = new CoordinateGPS(jWaypoints.getDouble("Latitude"), jWaypoints.getDouble("Longitude")); } catch (Exception ex) { // no Coordinates -> Lat/Lon = 0/0 waypoint.Pos = new CoordinateGPS(0, 0); } waypoint.setTitle(jWaypoints.getString("Description")); waypoint.setDescription(jWaypoints.getString("Comment")); waypoint.Type = GroundspeakAPI.getCacheType(jWaypoints.getInt("WptTypeID")); waypoint.setGcCode(jWaypoints.getString("Code")); cache.waypoints.add(waypoint); } // User Waypoints - Corrected Coordinates of the Geocaching.com Website JSONArray userWaypoints = jCache.getJSONArray("UserWaypoints"); for (int j = 0; j < userWaypoints.length(); j++) { JSONObject jUserWaypoint = (JSONObject) userWaypoints.get(j); if (!jUserWaypoint.getString("Description").equals("Coordinate Override")) { continue; // only corrected Coordinate } Waypoint waypoint = new Waypoint(true); waypoint.CacheId = cache.Id; try { waypoint.Pos = new CoordinateGPS(jUserWaypoint.getDouble("Latitude"), jUserWaypoint.getDouble("Longitude")); } catch (Exception ex) { // no Coordinates -> Lat/Lon = 0/0 waypoint.Pos = new CoordinateGPS(0, 0); } waypoint.setTitle(jUserWaypoint.getString("Description")); waypoint.setDescription(jUserWaypoint.getString("Description")); waypoint.Type = CacheTypes.Final; waypoint.setGcCode("CO" + cache.getGcCode().substring(2, cache.getGcCode().length())); cache.waypoints.add(waypoint); } // Spoiler aktualisieren actualizeSpoilerOfActualCache(cache); } // Notes Object note = jCache.get("GeocacheNote"); if ((note != null) && (note instanceof String)) { String s = (String) note; System.out.println(s); cache.setTmpNote(s); } } GroundspeakAPI.checkCacheStatus(json, isLite); } else { result = "StatusCode = " + status.getInt("StatusCode") + "\n"; result += status.getString("StatusMessage") + "\n"; result += status.getString("ExceptionDetails"); } } catch (JSONException e) { Log.err(log, "SearchForGeocaches:ParserException: " + result, e); } catch (ClassCastException e) { Log.err(log, "SearchForGeocaches:ParserException: " + result, e); } return result; }
From source file:bamboo.trove.full.FullReindexWarcManager.java
private void checkPersistence() { // Persist progress back to the database if we can LinkedList<ToIndex> iHopeThisIsDone = (LinkedList<ToIndex>) allBatches.peek(); if (iHopeThisIsDone == null) return;// w w w. ja v a 2 s .co m boolean itIsDone = false; boolean keepGoing = true; long warcId = 0; // Until we find something still active, keep trying while (keepGoing) { ToIndex warcToIndex = iHopeThisIsDone.peek(); if (warcToIndex == null) { itIsDone = true; keepGoing = false; continue; } warcId = warcToIndex.getId(); if (!warcToIndex.hasBeenRetrieved) { // We haven't indexed this far yet! keepGoing = false; continue; } // If it is still being tracked... if (warcTracking.containsKey(warcId)) { WarcProgressManager warc = warcTracking.get(warcId); // It might only be tracked because of errors... which are persisted separately if (warc.finished() && warc.hasErrors()) { iHopeThisIsDone.poll(); } else { // There is work left in this batch. Stop checking keepGoing = false; } // Not tracked. This warc is done } else { iHopeThisIsDone.poll(); } } // All warcs are completed in this batch if (itIsDone) { dao.updateLastId(warcId); persistedWarcId = warcId; log.info("Persisting progress for ID '{}'. Currently monitoring {} batches", warcId, allBatches.size()); // Clear it from the head allBatches.poll(); } }
From source file:azkaban.execapp.FlowRunner.java
private void resetFailedState(final ExecutableFlowBase flow, final List<ExecutableNode> nodesToRetry) { // bottom up//ww w . jav a2s . c o m final LinkedList<ExecutableNode> queue = new LinkedList<>(); for (final String id : flow.getEndNodes()) { final ExecutableNode node = flow.getExecutableNode(id); queue.add(node); } long maxStartTime = -1; while (!queue.isEmpty()) { final ExecutableNode node = queue.poll(); final Status oldStatus = node.getStatus(); maxStartTime = Math.max(node.getStartTime(), maxStartTime); final long currentTime = System.currentTimeMillis(); if (node.getStatus() == Status.SUCCEEDED) { // This is a candidate parent for restart nodesToRetry.add(node); continue; } else if (node.getStatus() == Status.RUNNING) { continue; } else if (node.getStatus() == Status.KILLING) { continue; } else if (node.getStatus() == Status.SKIPPED) { node.setStatus(Status.DISABLED); node.setEndTime(-1); node.setStartTime(-1); node.setUpdateTime(currentTime); } else if (node instanceof ExecutableFlowBase) { final ExecutableFlowBase base = (ExecutableFlowBase) node; switch (base.getStatus()) { case CANCELLED: node.setStatus(Status.READY); node.setEndTime(-1); node.setStartTime(-1); node.setUpdateTime(currentTime); // Break out of the switch. We'll reset the flow just like a normal // node break; case KILLED: case FAILED: case FAILED_FINISHING: resetFailedState(base, nodesToRetry); continue; default: // Continue the while loop. If the job is in a finished state that's // not // a failure, we don't want to reset the job. continue; } } else if (node.getStatus() == Status.CANCELLED) { // Not a flow, but killed node.setStatus(Status.READY); node.setStartTime(-1); node.setEndTime(-1); node.setUpdateTime(currentTime); } else if (node.getStatus() == Status.FAILED || node.getStatus() == Status.KILLED) { node.resetForRetry(); nodesToRetry.add(node); } if (!(node instanceof ExecutableFlowBase) && node.getStatus() != oldStatus) { this.logger.info( "Resetting job '" + node.getNestedId() + "' from " + oldStatus + " to " + node.getStatus()); } for (final String inId : node.getInNodes()) { final ExecutableNode nodeUp = flow.getExecutableNode(inId); queue.add(nodeUp); } } // At this point, the following code will reset the flow final Status oldFlowState = flow.getStatus(); if (maxStartTime == -1) { // Nothing has run inside the flow, so we assume the flow hasn't even // started running yet. flow.setStatus(Status.READY); } else { flow.setStatus(Status.RUNNING); // Add any READY start nodes. Usually it means the flow started, but the // start node has not. for (final String id : flow.getStartNodes()) { final ExecutableNode node = flow.getExecutableNode(id); if (node.getStatus() == Status.READY || node.getStatus() == Status.DISABLED) { nodesToRetry.add(node); } } } flow.setUpdateTime(System.currentTimeMillis()); flow.setEndTime(-1); this.logger.info( "Resetting flow '" + flow.getNestedId() + "' from " + oldFlowState + " to " + flow.getStatus()); }
From source file:org.eclipse.ecr.opencmis.impl.server.CMISQLQueryMaker.java
/** * {@inheritDoc}/*from ww w . j av a2 s .c o m*/ * <p> * The optional parameters must be passed: {@code params[0]} is the * {@link NuxeoCmisService}, optional {@code params[1]} is a type info map. */ @Override public Query buildQuery(SQLInfo sqlInfo, Model model, PathResolver pathResolver, String statement, QueryFilter queryFilter, Object... params) throws StorageException { database = sqlInfo.database; dialect = sqlInfo.dialect; this.model = model; NuxeoCmisService service = (NuxeoCmisService) params[0]; typeInfo = params.length > 1 ? (Map<String, PropertyDefinition<?>>) params[1] : null; TypeManagerImpl typeManager = service.repository.getTypeManager(); boolean addSystemColumns = true; // TODO hierTable = database.getTable(Model.HIER_TABLE_NAME); query = new QueryObject(typeManager); QueryUtil queryUtil = new QueryUtil(); CmisQueryWalker walker = null; try { walker = queryUtil.getWalker(statement); walker.query(query, new AnalyzingWalker()); } catch (RecognitionException e) { String msg; if (walker == null) { msg = e.getMessage(); } else { msg = "Line " + e.line + ":" + e.charPositionInLine + " " + walker.getErrorMessage(e, walker.getTokenNames()); } throw new QueryParseException(msg, e); } catch (QueryParseException e) { throw e; } catch (Exception e) { throw new QueryParseException(e.getMessage(), e); } // now resolve column selectors to actual database columns for (CmisSelector sel : query.getSelectReferences()) { recordSelectSelector(sel); } for (CmisSelector sel : query.getJoinReferences()) { recordSelector(sel, JOIN); } for (CmisSelector sel : query.getWhereReferences()) { recordSelector(sel, WHERE); } for (SortSpec spec : query.getOrderBys()) { recordSelector(spec.getSelector(), ORDER_BY); } boolean distinct = false; // TODO extension boolean skipHidden = true; // ignore hidden and trashed documents addSystemColumns(addSystemColumns, distinct, skipHidden); /* * Find info about fragments needed. */ List<String> whereClauses = new LinkedList<String>(); List<Serializable> whereParams = new LinkedList<Serializable>(); /* * Walk joins. */ List<JoinSpec> joins = query.getJoins(); StringBuilder from = new StringBuilder(); List<Serializable> fromParams = new LinkedList<Serializable>(); for (int njoin = -1; njoin < joins.size(); njoin++) { boolean firstTable = njoin == -1; JoinSpec join; String alias; if (firstTable) { join = null; alias = query.getMainTypeAlias(); } else { join = joins.get(njoin); alias = join.alias; } String typeQueryName = query.getTypeQueryName(alias); String qual = alias; if (qual.equals(typeQueryName)) { qual = null; } Table qualHierTable; qualHierTable = getTable(hierTable, qual); // table this join is about Table table; if (firstTable) { table = qualHierTable; } else { // find which table in onLeft/onRight refers to current // qualifier table = null; for (ColumnReference col : Arrays.asList(join.onLeft, join.onRight)) { if (alias.equals(col.getTypeQueryName())) { table = ((Column) col.getInfo()).getTable(); break; } } if (table == null) { throw new QueryParseException("Bad query, qualifier not found: " + qual); } // do requested join if (join.kind.equals("LEFT") || join.kind.equals("RIGHT")) { from.append(" "); from.append(join.kind); } from.append(" JOIN "); } boolean isRelation = table.getKey().equals(REL_FRAGMENT_NAME); // join requested table String name; if (table.isAlias()) { name = table.getRealTable().getQuotedName() + " " + table.getQuotedName(); } else { name = table.getQuotedName(); } from.append(name); if (!firstTable) { // emit actual join requested from.append(" ON "); from.append(((Column) join.onLeft.getInfo()).getFullQuotedName()); from.append(" = "); from.append(((Column) join.onRight.getInfo()).getFullQuotedName()); } // join other fragments for qualifier String tableMainId = table.getColumn(Model.MAIN_KEY).getFullQuotedName(); for (Table t : allTables.get(qual).values()) { if (t.getKey().equals(table.getKey())) { // this one was the first, already done continue; } String n; if (t.isAlias()) { n = t.getRealTable().getQuotedName() + " " + t.getQuotedName(); } else { n = t.getQuotedName(); } from.append(" LEFT JOIN "); from.append(n); from.append(" ON "); from.append(t.getColumn(Model.MAIN_KEY).getFullQuotedName()); from.append(" = "); from.append(tableMainId); } // restrict to relevant primary types List<String> types = new ArrayList<String>(); TypeDefinition td = query.getTypeDefinitionFromQueryName(typeQueryName); if (td.getParentTypeId() != null) { // don't add abstract root types types.add(td.getId()); } LinkedList<TypeDefinitionContainer> typesTodo = new LinkedList<TypeDefinitionContainer>(); typesTodo.addAll(typeManager.getTypeDescendants(td.getId(), -1, Boolean.TRUE)); // recurse to get all subtypes TypeDefinitionContainer tc; while ((tc = typesTodo.poll()) != null) { types.add(tc.getTypeDefinition().getId()); typesTodo.addAll(tc.getChildren()); } if (types.isEmpty()) { // shoudn't happen types = Collections.singletonList("__NOSUCHTYPE__"); } StringBuilder qms = new StringBuilder(); for (int i = 0; i < types.size(); i++) { if (i != 0) { qms.append(", "); } qms.append("?"); } whereClauses.add(String.format("%s IN (%s)", qualHierTable.getColumn(model.MAIN_PRIMARY_TYPE_KEY).getFullQuotedName(), qms)); whereParams.addAll(types); // lifecycle not deleted filter if (skipHidden) { Table misc = getTable(database.getTable(model.MISC_TABLE_NAME), qual); Column lscol = misc.getColumn(model.MISC_LIFECYCLE_STATE_KEY); whereClauses.add(String.format("%s <> ?", lscol.getFullQuotedName())); whereParams.add(LifeCycleConstants.DELETED_STATE); } // security check boolean checkSecurity = !isRelation // && queryFilter != null && queryFilter.getPrincipals() != null; if (checkSecurity) { Serializable principals; Serializable permissions; if (dialect.supportsArrays()) { principals = queryFilter.getPrincipals(); permissions = queryFilter.getPermissions(); } else { principals = StringUtils.join(queryFilter.getPrincipals(), '|'); permissions = StringUtils.join(queryFilter.getPermissions(), '|'); } if (dialect.supportsReadAcl()) { /* optimized read acl */ String readAclTable; String readAclIdCol; String readAclAclIdCol; if (joins.size() == 0) { readAclTable = model.HIER_READ_ACL_TABLE_NAME; readAclIdCol = model.HIER_READ_ACL_TABLE_NAME + '.' + model.HIER_READ_ACL_ID; readAclAclIdCol = model.HIER_READ_ACL_TABLE_NAME + '.' + model.HIER_READ_ACL_ACL_ID; } else { String al = "nxr" + (njoin + 1); readAclTable = model.HIER_READ_ACL_TABLE_NAME + " " + al; // TODO dialect readAclIdCol = al + '.' + model.HIER_READ_ACL_ID; readAclAclIdCol = al + '.' + model.HIER_READ_ACL_ACL_ID; } whereClauses.add(dialect.getReadAclsCheckSql(readAclAclIdCol)); whereParams.add(principals); from.append(String.format(" JOIN %s ON %s = %s", readAclTable, tableMainId, readAclIdCol)); } else { whereClauses.add(dialect.getSecurityCheckSql(tableMainId)); whereParams.add(principals); whereParams.add(permissions); } } } /* * WHERE clause. */ Tree whereNode = walker.getWherePredicateTree(); if (whereNode != null) { GeneratingWalker generator = new GeneratingWalker(); generator.walkPredicate(whereNode); whereClauses.add(generator.whereBuf.toString()); whereParams.addAll(generator.whereBufParams); // add JOINs for the external fulltext matches Collections.sort(generator.ftJoins); // implicit JOINs last // (PostgreSQL) for (org.eclipse.ecr.core.storage.sql.jdbc.db.Join join : generator.ftJoins) { from.append(join.toString()); if (join.tableParam != null) { fromParams.add(join.tableParam); } } } /* * SELECT clause. */ List<String> selectWhat = new ArrayList<String>(); List<Serializable> selectParams = new ArrayList<Serializable>(1); for (SqlColumn rc : realColumns) { selectWhat.add(rc.sql); } selectParams.addAll(realColumnsParams); CMISQLMapMaker mapMaker = new CMISQLMapMaker(realColumns, virtualColumns, service); String what = StringUtils.join(selectWhat, ", "); if (distinct) { what = "DISTINCT " + what; } /* * ORDER BY clause. */ List<String> orderbys = new LinkedList<String>(); for (SortSpec spec : query.getOrderBys()) { String orderby; CmisSelector sel = spec.getSelector(); if (sel instanceof ColumnReference) { Column column = (Column) sel.getInfo(); orderby = column.getFullQuotedName(); } else { orderby = fulltextMatchInfo.scoreAlias; } if (!spec.ascending) { orderby += " DESC"; } orderbys.add(orderby); } /* * Create the whole select. */ Select select = new Select(null); select.setWhat(what); select.setFrom(from.toString()); // TODO(fromParams); // TODO add before whereParams select.setWhere(StringUtils.join(whereClauses, " AND ")); select.setOrderBy(StringUtils.join(orderbys, ", ")); Query q = new Query(); q.selectInfo = new SQLInfoSelect(select.getStatement(), mapMaker); q.selectParams = selectParams; q.selectParams.addAll(fromParams); q.selectParams.addAll(whereParams); return q; }
From source file:com.baidu.rigel.biplatform.tesseract.isservice.index.service.impl.IndexServiceImpl.java
@Override public boolean initMiniCubeIndex(List<Cube> cubeList, DataSourceInfo dataSourceInfo, boolean indexAsap, boolean limited) throws IndexAndSearchException { /**/* w ww . j ava2 s . c o m*/ * MiniCubeConnection.publishCubes(List<String> cubes, DataSourceInfo * dataSourceInfo);?? */ LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_BEGIN, "initMiniCubeIndex", "[cubeList:" + cubeList + "][dataSourceInfo:" + dataSourceInfo + "][indexAsap:" + indexAsap + "][limited:" + limited + "]")); // step 1 process cubeList and fill indexMeta infomation List<IndexMeta> idxMetaList = this.indexMetaService.initMiniCubeIndexMeta(cubeList, dataSourceInfo); if (idxMetaList.size() == 0) { LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS, "initMiniCubeIndex", "[cubeList:" + cubeList + "][dataSourceInfo:" + dataSourceInfo + "][indexAsap:" + indexAsap + "][limited:" + limited + "]", "Init MiniCube IndexMeta failed")); return false; } else { LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "initMiniCubeIndex", "Success init " + idxMetaList.size() + " MiniCube")); } // step 2 merge indexMeta with exist indexMetas and update indexMeta LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "initMiniCubeIndex", "Merging IndexMeta with exist indexMetas")); LinkedList<IndexMeta> idxMetaListForIndex = new LinkedList<IndexMeta>(); for (IndexMeta idxMeta : idxMetaList) { idxMeta = this.indexMetaService.mergeIndexMeta(idxMeta); LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "initMiniCubeIndex", "Merge indexMeta success. After merge:[" + idxMeta.toString() + "]")); idxMetaListForIndex.add(idxMeta); } // step 3 if(indexAsap) then call doIndex else return if (indexAsap) { LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "initMiniCubeIndex", "index as soon as possible")); // if need index as soon as possible IndexAction idxAction = IndexAction.INDEX_INIT; if (limited) { idxAction = IndexAction.INDEX_INIT_LIMITED; } while (idxMetaListForIndex.size() > 0) { IndexMeta idxMeta = idxMetaListForIndex.poll(); if (idxMeta.getIdxState().equals(IndexState.INDEX_AVAILABLE_MERGE)) { idxMeta.setIdxState(IndexState.INDEX_AVAILABLE); this.indexMetaService.saveOrUpdateIndexMeta(idxMeta); continue; } else if (idxMeta.getIdxState().equals(IndexState.INDEX_AVAILABLE_NEEDMERGE)) { idxAction = IndexAction.INDEX_MERGE; } try { doIndexByIndexAction(idxMeta, idxAction, null); } catch (Exception e) { LOGGER.error(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_EXCEPTION, "initMiniCubeIndex", "[cubeList:" + cubeList + "][dataSourceInfo:" + dataSourceInfo + "][indexAsap:" + indexAsap + "][limited:" + limited + "]"), e); String message = TesseractExceptionUtils.getExceptionMessage( IndexAndSearchException.INDEXEXCEPTION_MESSAGE, IndexAndSearchExceptionType.INDEX_EXCEPTION); throw new IndexAndSearchException(message, e, IndexAndSearchExceptionType.INDEX_EXCEPTION); } finally { LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "initMiniCubeIndex", "[Index indexmeta : " + idxMeta.toString())); } } } LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_END, "initMiniCubeIndex", "[cubeList:" + cubeList + "][dataSourceInfo:" + dataSourceInfo + "][indexAsap:" + indexAsap + "][limited:" + limited + "]")); return true; }
From source file:org.nuxeo.ecm.core.opencmis.impl.server.CMISQLQueryMaker.java
/** * {@inheritDoc}/* ww w . ja va2 s. c om*/ * <p> * The optional parameters must be passed: {@code params[0]} is the {@link NuxeoCmisService}, optional * {@code params[1]} is a type info map, optional {@code params[2]} is searchAllVersions (default * {@code Boolean.TRUE} for this method). */ @Override public Query buildQuery(SQLInfo sqlInfo, Model model, PathResolver pathResolver, String statement, QueryFilter queryFilter, Object... params) throws StorageException { database = sqlInfo.database; dialect = sqlInfo.dialect; this.model = model; NuxeoCmisService service = (NuxeoCmisService) params[0]; if (params.length > 1) { typeInfo = (Map<String, PropertyDefinition<?>>) params[1]; } if (params.length > 2) { Boolean searchAllVersions = (Boolean) params[2]; searchLatestVersion = Boolean.FALSE.equals(searchAllVersions); } TypeManagerImpl typeManager = service.repository.getTypeManager(); boolean addSystemColumns = true; // TODO hierTable = database.getTable(Model.HIER_TABLE_NAME); query = new QueryObject(typeManager); statement = applySecurityPolicyQueryTransformers(service, queryFilter.getPrincipal(), statement); CmisQueryWalker walker = null; try { walker = QueryUtil.getWalker(statement); walker.setDoFullTextParse(false); walker.query(query, new AnalyzingWalker()); } catch (RecognitionException e) { String msg; if (walker == null) { msg = e.getMessage(); } else { msg = "Line " + e.line + ":" + e.charPositionInLine + " " + walker.getErrorMessage(e, walker.getTokenNames()); } throw new QueryParseException(msg, e); } catch (QueryParseException e) { throw e; } catch (Exception e) { throw new QueryParseException(e.getMessage() + " for query: " + statement, e); } resolveQualifiers(); // now resolve column selectors to actual database columns for (CmisSelector sel : query.getSelectReferences()) { recordSelectSelector(sel); } for (CmisSelector sel : query.getJoinReferences()) { recordSelector(sel, JOIN); } for (CmisSelector sel : query.getWhereReferences()) { recordSelector(sel, WHERE); } for (SortSpec spec : query.getOrderBys()) { recordSelector(spec.getSelector(), ORDER_BY); } findVersionableQualifiers(); boolean distinct = false; // TODO extension addSystemColumns(addSystemColumns, distinct); /* * Find info about fragments needed. */ List<String> whereClauses = new LinkedList<String>(); List<Serializable> whereParams = new LinkedList<Serializable>(); /* * Walk joins. */ List<JoinSpec> joins = query.getJoins(); StringBuilder from = new StringBuilder(); List<Serializable> fromParams = new LinkedList<Serializable>(); for (int njoin = -1; njoin < joins.size(); njoin++) { JoinSpec join; boolean outerJoin; String alias; if (njoin == -1) { join = null; outerJoin = false; alias = query.getMainTypeAlias(); } else { join = joins.get(njoin); outerJoin = join.kind.equals("LEFT") || join.kind.equals("RIGHT"); alias = join.alias; } String typeQueryName = qualifierToType.get(alias); String qual = canonicalQualifier.get(alias); Table qualHierTable = getTable(hierTable, qual); // determine relevant primary types List<String> types = new ArrayList<String>(); TypeDefinition td = query.getTypeDefinitionFromQueryName(typeQueryName); if (td.getParentTypeId() != null) { // don't add abstract root types types.add(td.getId()); } LinkedList<TypeDefinitionContainer> typesTodo = new LinkedList<TypeDefinitionContainer>(); typesTodo.addAll(typeManager.getTypeDescendants(td.getId(), -1, Boolean.TRUE)); // recurse to get all subtypes TypeDefinitionContainer tc; while ((tc = typesTodo.poll()) != null) { types.add(tc.getTypeDefinition().getId()); typesTodo.addAll(tc.getChildren()); } if (types.isEmpty()) { // shoudn't happen types = Collections.singletonList("__NOSUCHTYPE__"); } // build clause StringBuilder qms = new StringBuilder(); for (int i = 0; i < types.size(); i++) { if (i != 0) { qms.append(", "); } qms.append("?"); } String primaryTypeClause = String.format("%s IN (%s)", qualHierTable.getColumn(model.MAIN_PRIMARY_TYPE_KEY).getFullQuotedName(), qms); // table this join is about Table table; if (join == null) { table = qualHierTable; } else { // find which table in onLeft/onRight refers to current // qualifier table = null; for (ColumnReference col : Arrays.asList(join.onLeft, join.onRight)) { if (alias.equals(col.getQualifier())) { // TODO match with canonical qualifier instead? table = ((Column) col.getInfo()).getTable(); break; } } if (table == null) { throw new QueryParseException("Bad query, qualifier not found: " + qual); } } String tableName; if (table.isAlias()) { tableName = table.getRealTable().getQuotedName() + " " + table.getQuotedName(); } else { tableName = table.getQuotedName(); } boolean isRelation = table.getKey().equals(REL_FRAGMENT_NAME); // join clause on requested columns boolean primaryTypeClauseDone = false; if (join == null) { from.append(tableName); } else { if (outerJoin) { from.append(" "); from.append(join.kind); } from.append(" JOIN "); from.append(tableName); from.append(" ON ("); from.append(((Column) join.onLeft.getInfo()).getFullQuotedName()); from.append(" = "); from.append(((Column) join.onRight.getInfo()).getFullQuotedName()); if (outerJoin && table.getKey().equals(Model.HIER_TABLE_NAME)) { // outer join, type check must be part of JOIN from.append(" AND "); from.append(primaryTypeClause); fromParams.addAll(types); primaryTypeClauseDone = true; } from.append(")"); } // join other fragments for qualifier String tableMainId = table.getColumn(Model.MAIN_KEY).getFullQuotedName(); for (Table t : allTables.get(qual).values()) { if (t.getKey().equals(table.getKey())) { // already done above continue; } String n; if (t.isAlias()) { n = t.getRealTable().getQuotedName() + " " + t.getQuotedName(); } else { n = t.getQuotedName(); } from.append(" LEFT JOIN "); from.append(n); from.append(" ON ("); from.append(t.getColumn(Model.MAIN_KEY).getFullQuotedName()); from.append(" = "); from.append(tableMainId); if (outerJoin && t.getKey().equals(Model.HIER_TABLE_NAME)) { // outer join, type check must be part of JOIN from.append(" AND "); from.append(primaryTypeClause); fromParams.addAll(types); primaryTypeClauseDone = true; } from.append(")"); } // primary type clause, if not included in a JOIN if (!primaryTypeClauseDone) { whereClauses.add(primaryTypeClause); whereParams.addAll(types); } // lifecycle not deleted filter if (skipDeleted) { ModelProperty propertyInfo = model.getPropertyInfo(model.MISC_LIFECYCLE_STATE_PROP); Column lscol = getTable(database.getTable(propertyInfo.fragmentName), qual) .getColumn(propertyInfo.fragmentKey); String lscolName = lscol.getFullQuotedName(); whereClauses.add(String.format("(%s <> ? OR %s IS NULL)", lscolName, lscolName)); whereParams.add(LifeCycleConstants.DELETED_STATE); } // searchAllVersions filter boolean versionable = versionableQualifiers.contains(qual); if (searchLatestVersion && versionable) { // add islatestversion = true Table ver = getTable(database.getTable(model.VERSION_TABLE_NAME), qual); Column latestvercol = ver.getColumn(model.VERSION_IS_LATEST_KEY); String latestvercolName = latestvercol.getFullQuotedName(); whereClauses.add(String.format("(%s = ?)", latestvercolName)); whereParams.add(Boolean.TRUE); } // security check boolean checkSecurity = !isRelation // && queryFilter != null && queryFilter.getPrincipals() != null; if (checkSecurity) { Serializable principals; Serializable permissions; if (dialect.supportsArrays()) { principals = queryFilter.getPrincipals(); permissions = queryFilter.getPermissions(); } else { principals = StringUtils.join(queryFilter.getPrincipals(), '|'); permissions = StringUtils.join(queryFilter.getPermissions(), '|'); } if (dialect.supportsReadAcl()) { /* optimized read acl */ String readAclTable; String readAclTableAlias; String aclrumTable; String aclrumTableAlias; if (joins.size() == 0) { readAclTable = Model.HIER_READ_ACL_TABLE_NAME; readAclTableAlias = readAclTable; aclrumTable = Model.ACLR_USER_MAP_TABLE_NAME; aclrumTableAlias = aclrumTable; } else { readAclTableAlias = "nxr" + (njoin + 1); readAclTable = Model.HIER_READ_ACL_TABLE_NAME + ' ' + readAclTableAlias; // TODO dialect aclrumTableAlias = "aclrum" + (njoin + 1); aclrumTable = Model.ACLR_USER_MAP_TABLE_NAME + ' ' + aclrumTableAlias; // TODO dialect } String readAclIdCol = readAclTableAlias + '.' + Model.HIER_READ_ACL_ID; String readAclAclIdCol = readAclTableAlias + '.' + Model.HIER_READ_ACL_ACL_ID; String aclrumAclIdCol = aclrumTableAlias + '.' + Model.ACLR_USER_MAP_ACL_ID; String aclrumUserIdCol = aclrumTableAlias + '.' + Model.ACLR_USER_MAP_USER_ID; // first join with hierarchy_read_acl if (outerJoin) { from.append(" "); from.append(join.kind); } from.append(String.format(" JOIN %s ON (%s = %s)", readAclTable, tableMainId, readAclIdCol)); // second join with aclr_user_map String securityCheck = dialect.getReadAclsCheckSql(aclrumUserIdCol); String joinOn = String.format("%s = %s", readAclAclIdCol, aclrumAclIdCol); if (outerJoin) { from.append(" "); from.append(join.kind); // outer join, security check must be part of JOIN joinOn = String.format("%s AND %s", joinOn, securityCheck); fromParams.add(principals); } else { // inner join, security check can go in WHERE clause whereClauses.add(securityCheck); whereParams.add(principals); } from.append(String.format(" JOIN %s ON (%s)", aclrumTable, joinOn)); } else { String securityCheck = dialect.getSecurityCheckSql(tableMainId); if (outerJoin) { securityCheck = String.format("(%s OR %s IS NULL)", securityCheck, tableMainId); } whereClauses.add(securityCheck); whereParams.add(principals); whereParams.add(permissions); } } } /* * WHERE clause. */ Tree whereNode = walker.getWherePredicateTree(); if (whereNode != null) { GeneratingWalker generator = new GeneratingWalker(); generator.walkPredicate(whereNode); whereClauses.add(generator.whereBuf.toString()); whereParams.addAll(generator.whereBufParams); // add JOINs for the external fulltext matches Collections.sort(generator.ftJoins); // implicit JOINs last // (PostgreSQL) for (org.nuxeo.ecm.core.storage.sql.jdbc.db.Join join : generator.ftJoins) { from.append(join.toSql(dialect)); if (join.tableParam != null) { fromParams.add(join.tableParam); } } } /* * SELECT clause. */ List<String> selectWhat = new ArrayList<String>(); List<Serializable> selectParams = new ArrayList<Serializable>(1); for (SqlColumn rc : realColumns) { selectWhat.add(rc.sql); } selectParams.addAll(realColumnsParams); CMISQLMapMaker mapMaker = new CMISQLMapMaker(realColumns, virtualColumns, service); String what = StringUtils.join(selectWhat, ", "); if (distinct) { what = "DISTINCT " + what; } /* * ORDER BY clause. */ List<String> orderbys = new LinkedList<String>(); for (SortSpec spec : query.getOrderBys()) { String orderby; CmisSelector sel = spec.getSelector(); if (sel instanceof ColumnReference) { Column column = (Column) sel.getInfo(); orderby = column.getFullQuotedName(); } else { orderby = fulltextMatchInfo.scoreAlias; } if (!spec.ascending) { orderby += " DESC"; } orderbys.add(orderby); } /* * Create the whole select. */ Select select = new Select(null); select.setWhat(what); select.setFrom(from.toString()); // TODO(fromParams); // TODO add before whereParams select.setWhere(StringUtils.join(whereClauses, " AND ")); select.setOrderBy(StringUtils.join(orderbys, ", ")); Query q = new Query(); q.selectInfo = new SQLInfoSelect(select.getStatement(), mapMaker); q.selectParams = selectParams; q.selectParams.addAll(fromParams); q.selectParams.addAll(whereParams); return q; }
From source file:com.joliciel.talismane.TalismaneImpl.java
public void analyse(TalismaneConfig config) { try {// ww w . j av a2 s. co m if (config.needsSentenceDetector()) { if (config.getSentenceDetector() == null) { throw new TalismaneException("Sentence detector not provided."); } } if (config.needsTokeniser()) { if (config.getTokeniser() == null) { throw new TalismaneException("Tokeniser not provided."); } } if (config.needsPosTagger()) { if (config.getPosTagger() == null) { throw new TalismaneException("Pos-tagger not provided."); } } if (config.needsParser()) { if (config.getParser() == null) { throw new TalismaneException("Parser not provided."); } } if (config.getEndModule().equals(Module.SentenceDetector)) { if (this.getSentenceProcessor() == null) { throw new TalismaneException( "No sentence processor provided with sentence detector end module, cannot generate output."); } } if (config.getEndModule().equals(Module.Tokeniser)) { if (this.getTokenSequenceProcessor() == null) { throw new TalismaneException( "No token sequence processor provided with tokeniser end module, cannot generate output."); } } if (config.getEndModule().equals(Module.PosTagger)) { if (this.getPosTagSequenceProcessor() == null) { throw new TalismaneException( "No postag sequence processor provided with pos-tagger end module, cannot generate output."); } } if (config.getEndModule().equals(Module.Parser)) { if (this.getParseConfigurationProcessor() == null) { throw new TalismaneException( "No parse configuration processor provided with parser end module, cannot generate output."); } } LinkedList<String> textSegments = new LinkedList<String>(); LinkedList<Sentence> sentences = new LinkedList<Sentence>(); TokenSequence tokenSequence = null; PosTagSequence posTagSequence = null; RollingSentenceProcessor rollingSentenceProcessor = this.getFilterService() .getRollingSentenceProcessor(config.getFileName(), config.isProcessByDefault()); Sentence leftover = null; if (config.getStartModule().equals(Module.SentenceDetector) || config.getStartModule().equals(Module.Tokeniser)) { // prime the sentence detector with two text segments, to ensure everything gets processed textSegments.addLast(""); textSegments.addLast(""); } StringBuilder stringBuilder = new StringBuilder(); boolean finished = false; int sentenceCount = 0; String prevProcessedText = ""; String processedText = ""; String nextProcessedText = ""; SentenceHolder prevSentenceHolder = null; int endBlockCharacterCount = 0; while (!finished) { if (config.getStartModule().equals(Module.SentenceDetector) || config.getStartModule().equals(Module.Tokeniser)) { // Note SentenceDetector and Tokeniser start modules treated identically, // except that for SentenceDetector we apply a probabilistic sentence detector // whereas for Tokeniser we assume all sentence breaks are marked by filters // read characters from the reader, one at a time char c; int r = -1; try { r = this.getReader().read(); } catch (IOException e) { LogUtils.logError(LOG, e); } if (r == -1) { finished = true; c = '\n'; } else { c = (char) r; } // Jump out if we have 3 consecutive end-block characters. if (c == config.getEndBlockCharacter()) { endBlockCharacterCount++; if (endBlockCharacterCount == 3) { LOG.info("Three consecutive end-block characters. Exiting."); finished = true; } } else { endBlockCharacterCount = 0; } // have sentence detector if (finished || (Character.isWhitespace(c) && stringBuilder.length() > config.getBlockSize()) || c == config.getEndBlockCharacter()) { if (c == config.getEndBlockCharacter()) stringBuilder.append(c); if (stringBuilder.length() > 0) { String textSegment = stringBuilder.toString(); stringBuilder = new StringBuilder(); textSegments.add(textSegment); } // is the current block > 0 characters? if (c == config.getEndBlockCharacter()) { textSegments.addLast(""); } } // is there a next block available? if (finished) { if (stringBuilder.length() > 0) { textSegments.addLast(stringBuilder.toString()); stringBuilder = new StringBuilder(); } textSegments.addLast(""); textSegments.addLast(""); textSegments.addLast(""); } if (c != config.getEndBlockCharacter()) stringBuilder.append(c); while (textSegments.size() >= 3) { String prevText = textSegments.removeFirst(); String text = textSegments.removeFirst(); String nextText = textSegments.removeFirst(); if (LOG.isTraceEnabled()) { LOG.trace("prevText: " + prevText); LOG.trace("text: " + text); LOG.trace("nextText: " + nextText); } Set<TextMarker> textMarkers = new TreeSet<TextMarker>(); for (TextMarkerFilter textMarkerFilter : config.getTextMarkerFilters()) { Set<TextMarker> result = textMarkerFilter.apply(prevText, text, nextText); textMarkers.addAll(result); } // push the text segments back onto the beginning of Deque textSegments.addFirst(nextText); textSegments.addFirst(text); SentenceHolder sentenceHolder = rollingSentenceProcessor.addNextSegment(text, textMarkers); prevProcessedText = processedText; processedText = nextProcessedText; nextProcessedText = sentenceHolder.getText(); if (LOG.isTraceEnabled()) { LOG.trace("prevProcessedText: " + prevProcessedText); LOG.trace("processedText: " + processedText); LOG.trace("nextProcessedText: " + nextProcessedText); } boolean reallyFinished = finished && textSegments.size() == 3; if (prevSentenceHolder != null) { if (config.getStartModule().equals(Module.SentenceDetector)) { List<Integer> sentenceBreaks = config.getSentenceDetector() .detectSentences(prevProcessedText, processedText, nextProcessedText); for (int sentenceBreak : sentenceBreaks) { prevSentenceHolder.addSentenceBoundary(sentenceBreak); } } List<Sentence> theSentences = prevSentenceHolder.getDetectedSentences(leftover); leftover = null; for (Sentence sentence : theSentences) { if (sentence.isComplete() || reallyFinished) { sentences.add(sentence); sentenceCount++; } else { LOG.debug("Setting leftover to: " + sentence.getText()); leftover = sentence; } } if (config.getMaxSentenceCount() > 0 && sentenceCount >= config.getMaxSentenceCount()) { finished = true; } } prevSentenceHolder = sentenceHolder; } // we have at least 3 text segments (should always be the case once we get started) } else if (config.getStartModule().equals(Module.PosTagger)) { if (config.getTokenCorpusReader().hasNextTokenSequence()) { tokenSequence = config.getTokenCorpusReader().nextTokenSequence(); } else { tokenSequence = null; finished = true; } } else if (config.getStartModule().equals(Module.Parser)) { if (config.getPosTagCorpusReader().hasNextPosTagSequence()) { posTagSequence = config.getPosTagCorpusReader().nextPosTagSequence(); } else { posTagSequence = null; finished = true; } } // which start module? boolean needToProcess = false; if (config.getStartModule().equals(Module.SentenceDetector) || config.getStartModule().equals(Module.Tokeniser)) needToProcess = !sentences.isEmpty(); else if (config.getStartModule().equals(Module.PosTagger)) needToProcess = tokenSequence != null; else if (config.getStartModule().equals(Module.Parser)) needToProcess = posTagSequence != null; while (needToProcess) { Sentence sentence = null; if (config.getStartModule().compareTo(Module.Tokeniser) <= 0 && config.getEndModule().compareTo(Module.SentenceDetector) >= 0) { sentence = sentences.poll(); LOG.debug("Sentence: " + sentence); if (this.getSentenceProcessor() != null) this.getSentenceProcessor().onNextSentence(sentence.getText(), this.getWriter()); } // need to read next sentence List<TokenSequence> tokenSequences = null; if (config.needsTokeniser()) { tokenSequences = config.getTokeniser().tokenise(sentence); tokenSequence = tokenSequences.get(0); if (this.getTokenSequenceProcessor() != null) { this.getTokenSequenceProcessor().onNextTokenSequence(tokenSequence, this.getWriter()); } } // need to tokenise ? List<PosTagSequence> posTagSequences = null; if (config.needsPosTagger()) { posTagSequence = null; if (tokenSequences == null || !config.isPropagateTokeniserBeam()) { tokenSequences = new ArrayList<TokenSequence>(); tokenSequences.add(tokenSequence); } if (config.getPosTagger() instanceof NonDeterministicPosTagger) { NonDeterministicPosTagger nonDeterministicPosTagger = (NonDeterministicPosTagger) config .getPosTagger(); posTagSequences = nonDeterministicPosTagger.tagSentence(tokenSequences); posTagSequence = posTagSequences.get(0); } else { posTagSequence = config.getPosTagger().tagSentence(tokenSequence); } if (posTagSequenceProcessor != null) { posTagSequenceProcessor.onNextPosTagSequence(posTagSequence, this.getWriter()); } tokenSequence = null; } // need to postag if (config.needsParser()) { if (posTagSequences == null || !config.isPropagatePosTaggerBeam()) { posTagSequences = new ArrayList<PosTagSequence>(); posTagSequences.add(posTagSequence); } ParseConfiguration parseConfiguration = null; List<ParseConfiguration> parseConfigurations = null; try { if (config.getParser() instanceof NonDeterministicParser) { NonDeterministicParser nonDeterministicParser = (NonDeterministicParser) config .getParser(); parseConfigurations = nonDeterministicParser.parseSentence(posTagSequences); parseConfiguration = parseConfigurations.get(0); } else { parseConfiguration = config.getParser().parseSentence(posTagSequence); } if (this.getParseConfigurationProcessor() != null) { this.getParseConfigurationProcessor().onNextParseConfiguration(parseConfiguration, this.getWriter()); } } catch (Exception e) { LOG.error(e); if (stopOnError) throw new RuntimeException(e); } posTagSequence = null; } // need to parse if (config.getStartModule().equals(Module.SentenceDetector) || config.getStartModule().equals(Module.Tokeniser)) needToProcess = !sentences.isEmpty(); else if (config.getStartModule().equals(Module.PosTagger)) needToProcess = tokenSequence != null; else if (config.getStartModule().equals(Module.Parser)) needToProcess = posTagSequence != null; } // next sentence } // next character } finally { if (this.getParseConfigurationProcessor() != null) { this.getParseConfigurationProcessor().onCompleteParse(); } try { this.getReader().close(); this.getWriter().flush(); this.getWriter().close(); } catch (IOException ioe2) { LOG.error(ioe2); throw new RuntimeException(ioe2); } } }