List of usage examples for java.util LinkedList isEmpty
boolean isEmpty();
From source file:com.datatorrent.stram.StreamingContainerManager.java
/** * Compute checkpoints required for a given operator instance to be recovered. * This is done by looking at checkpoints available for downstream dependencies first, * and then selecting the most recent available checkpoint that is smaller than downstream. * * @param operator Operator instance for which to find recovery checkpoint * @param ctx Context into which to collect traversal info */// ww w . j a v a2 s . c o m public void updateRecoveryCheckpoints(PTOperator operator, UpdateCheckpointsContext ctx) { if (operator.getRecoveryCheckpoint().windowId < ctx.committedWindowId.longValue()) { ctx.committedWindowId.setValue(operator.getRecoveryCheckpoint().windowId); } if (operator.getState() == PTOperator.State.ACTIVE && (ctx.currentTms - operator.stats.lastWindowIdChangeTms) > operator.stats.windowProcessingTimeoutMillis) { // if the checkpoint is ahead, then it is not blocked but waiting for activation (state-less recovery, at-most-once) if (ctx.committedWindowId.longValue() >= operator.getRecoveryCheckpoint().windowId) { LOG.debug("Marking operator {} blocked committed window {}, recovery window {}", operator, Codec.getStringWindowId(ctx.committedWindowId.longValue()), Codec.getStringWindowId(operator.getRecoveryCheckpoint().windowId)); ctx.blocked.add(operator); } } // the most recent checkpoint eligible for recovery based on downstream state Checkpoint maxCheckpoint = Checkpoint.INITIAL_CHECKPOINT; Set<OperatorMeta> checkpointGroup = ctx.checkpointGroups.get(operator.getOperatorMeta()); if (checkpointGroup == null) { checkpointGroup = Collections.singleton(operator.getOperatorMeta()); } // find intersection of checkpoints that group can collectively move to TreeSet<Checkpoint> commonCheckpoints = new TreeSet<>(new Checkpoint.CheckpointComparator()); synchronized (operator.checkpoints) { commonCheckpoints.addAll(operator.checkpoints); } Set<PTOperator> groupOpers = new HashSet<>(checkpointGroup.size()); boolean pendingDeploy = operator.getState() == PTOperator.State.PENDING_DEPLOY; if (checkpointGroup.size() > 1) { for (OperatorMeta om : checkpointGroup) { Collection<PTOperator> operators = plan.getAllOperators(om); for (PTOperator groupOper : operators) { synchronized (groupOper.checkpoints) { commonCheckpoints.retainAll(groupOper.checkpoints); } // visit all downstream operators of the group ctx.visited.add(groupOper); groupOpers.add(groupOper); pendingDeploy |= operator.getState() == PTOperator.State.PENDING_DEPLOY; } } // highest common checkpoint if (!commonCheckpoints.isEmpty()) { maxCheckpoint = commonCheckpoints.last(); } } else { // without logical grouping, treat partitions as independent // this is especially important for parallel partitioning ctx.visited.add(operator); groupOpers.add(operator); maxCheckpoint = operator.getRecentCheckpoint(); if (ctx.recovery && maxCheckpoint.windowId == Stateless.WINDOW_ID && operator.isOperatorStateLess()) { long currentWindowId = WindowGenerator.getWindowId(ctx.currentTms, this.vars.windowStartMillis, this.getLogicalPlan().getValue(LogicalPlan.STREAMING_WINDOW_SIZE_MILLIS)); maxCheckpoint = new Checkpoint(currentWindowId, 0, 0); } } // DFS downstream operators for (PTOperator groupOper : groupOpers) { for (PTOperator.PTOutput out : groupOper.getOutputs()) { for (PTOperator.PTInput sink : out.sinks) { PTOperator sinkOperator = sink.target; if (groupOpers.contains(sinkOperator)) { continue; // downstream operator within group } if (!ctx.visited.contains(sinkOperator)) { // downstream traversal updateRecoveryCheckpoints(sinkOperator, ctx); } // recovery window id cannot move backwards // when dynamically adding new operators if (sinkOperator.getRecoveryCheckpoint().windowId >= operator .getRecoveryCheckpoint().windowId) { maxCheckpoint = Checkpoint.min(maxCheckpoint, sinkOperator.getRecoveryCheckpoint()); } if (ctx.blocked.contains(sinkOperator)) { if (sinkOperator.stats.getCurrentWindowId() == operator.stats.getCurrentWindowId()) { // downstream operator is blocked by this operator ctx.blocked.remove(sinkOperator); } } } } } // find the common checkpoint that is <= downstream recovery checkpoint if (!commonCheckpoints.contains(maxCheckpoint)) { if (!commonCheckpoints.isEmpty()) { maxCheckpoint = Objects.firstNonNull(commonCheckpoints.floor(maxCheckpoint), maxCheckpoint); } } for (PTOperator groupOper : groupOpers) { // checkpoint frozen during deployment if (!pendingDeploy || ctx.recovery) { // remove previous checkpoints Checkpoint c1 = Checkpoint.INITIAL_CHECKPOINT; LinkedList<Checkpoint> checkpoints = groupOper.checkpoints; synchronized (checkpoints) { if (!checkpoints.isEmpty() && (checkpoints.getFirst()).windowId <= maxCheckpoint.windowId) { c1 = checkpoints.getFirst(); Checkpoint c2; while (checkpoints.size() > 1 && ((c2 = checkpoints.get(1)).windowId) <= maxCheckpoint.windowId) { checkpoints.removeFirst(); //LOG.debug("Checkpoint to delete: operator={} windowId={}", operator.getName(), c1); this.purgeCheckpoints.add(new Pair<PTOperator, Long>(groupOper, c1.windowId)); c1 = c2; } } else { if (ctx.recovery && checkpoints.isEmpty() && groupOper.isOperatorStateLess()) { LOG.debug("Adding checkpoint for stateless operator {} {}", groupOper, Codec.getStringWindowId(maxCheckpoint.windowId)); c1 = groupOper.addCheckpoint(maxCheckpoint.windowId, this.vars.windowStartMillis); } } } //LOG.debug("Operator {} checkpoints: commit {} recent {}", new Object[] {operator.getName(), c1, operator.checkpoints}); groupOper.setRecoveryCheckpoint(c1); } else { LOG.debug("Skipping checkpoint update {} during {}", groupOper, groupOper.getState()); } } }
From source file:com.erudika.para.validation.ValidationUtils.java
/** * Validates objects.//from w w w . java 2 s . c o m * @param content an object to be validated * @param app the current app * @return a list of error messages or empty if object is valid */ public static String[] validateObject(App app, ParaObject content) { if (content == null || app == null) { return new String[] { "Object cannot be null." }; } try { String type = content.getType(); boolean isCustomType = (content instanceof Sysprop) && !type.equals(Utils.type(Sysprop.class)); // Validate custom types and user-defined properties if (!app.getValidationConstraints().isEmpty() && isCustomType) { Map<String, Map<String, Map<String, ?>>> fieldsMap = app.getValidationConstraints().get(type); if (fieldsMap != null && !fieldsMap.isEmpty()) { LinkedList<String> errors = new LinkedList<String>(); for (Map.Entry<String, Map<String, Map<String, ?>>> e : fieldsMap.entrySet()) { String field = e.getKey(); Object actualValue = ((Sysprop) content).getProperty(field); // overriding core property validation rules is allowed if (actualValue == null && PropertyUtils.isReadable(content, field)) { actualValue = PropertyUtils.getProperty(content, field); } Map<String, Map<String, ?>> consMap = e.getValue(); for (Map.Entry<String, Map<String, ?>> constraint : consMap.entrySet()) { String consName = constraint.getKey(); Map<String, ?> vals = constraint.getValue(); if (vals == null) { vals = Collections.emptyMap(); } Object val = vals.get("value"); Object min = vals.get("min"); Object max = vals.get("max"); Object in = vals.get("integer"); Object fr = vals.get("fraction"); if ("required".equals(consName) && !required().isValid(actualValue)) { errors.add(Utils.formatMessage("{0} is required.", field)); } else if (matches(Min.class, consName) && !min(val).isValid(actualValue)) { errors.add( Utils.formatMessage("{0} must be a number larger than {1}.", field, val)); } else if (matches(Max.class, consName) && !max(val).isValid(actualValue)) { errors.add( Utils.formatMessage("{0} must be a number smaller than {1}.", field, val)); } else if (matches(Size.class, consName) && !size(min, max).isValid(actualValue)) { errors.add( Utils.formatMessage("{0} must be between {1} and {2}.", field, min, max)); } else if (matches(Email.class, consName) && !email().isValid(actualValue)) { errors.add(Utils.formatMessage("{0} is not a valid email.", field)); } else if (matches(Digits.class, consName) && !digits(in, fr).isValid(actualValue)) { errors.add( Utils.formatMessage("{0} is not a valid number or within range.", field)); } else if (matches(Pattern.class, consName) && !pattern(val).isValid(actualValue)) { errors.add(Utils.formatMessage("{0} doesn't match the pattern {1}.", field, val)); } else if (matches(AssertFalse.class, consName) && !falsy().isValid(actualValue)) { errors.add(Utils.formatMessage("{0} must be false.", field)); } else if (matches(AssertTrue.class, consName) && !truthy().isValid(actualValue)) { errors.add(Utils.formatMessage("{0} must be true.", field)); } else if (matches(Future.class, consName) && !future().isValid(actualValue)) { errors.add(Utils.formatMessage("{0} must be in the future.", field)); } else if (matches(Past.class, consName) && !past().isValid(actualValue)) { errors.add(Utils.formatMessage("{0} must be in the past.", field)); } else if (matches(URL.class, consName) && !url().isValid(actualValue)) { errors.add(Utils.formatMessage("{0} is not a valid URL.", field)); } } } if (!errors.isEmpty()) { return errors.toArray(new String[0]); } } } } catch (Exception ex) { logger.error(null, ex); } return validateObject(content); }
From source file:org.apache.sling.resourceresolver.impl.ResourceResolverImpl.java
/** * full implementation - apply sling:alias from the resource path - apply * /etc/map mappings (inkl. config backwards compat) - return absolute uri * if possible//from w w w.j av a 2 s.co m * * @see org.apache.sling.api.resource.ResourceResolver#map(javax.servlet.http.HttpServletRequest, * java.lang.String) */ @Override public String map(final HttpServletRequest request, final String resourcePath) { checkClosed(); // find a fragment or query int fragmentQueryMark = resourcePath.indexOf('#'); if (fragmentQueryMark < 0) { fragmentQueryMark = resourcePath.indexOf('?'); } // cut fragment or query off the resource path String mappedPath; final String fragmentQuery; if (fragmentQueryMark >= 0) { fragmentQuery = resourcePath.substring(fragmentQueryMark); mappedPath = resourcePath.substring(0, fragmentQueryMark); logger.debug("map: Splitting resource path '{}' into '{}' and '{}'", new Object[] { resourcePath, mappedPath, fragmentQuery }); } else { fragmentQuery = null; mappedPath = resourcePath; } // cut off scheme and host, if the same as requested final String schemehostport; final String schemePrefix; if (request != null) { schemehostport = MapEntry.getURI(request.getScheme(), request.getServerName(), request.getServerPort(), "/"); schemePrefix = request.getScheme().concat("://"); logger.debug("map: Mapping path {} for {} (at least with scheme prefix {})", new Object[] { resourcePath, schemehostport, schemePrefix }); } else { schemehostport = null; schemePrefix = null; logger.debug("map: Mapping path {} for default", resourcePath); } ParsedParameters parsed = new ParsedParameters(mappedPath); final Resource res = resolveInternal(parsed.getRawPath(), parsed.getParameters()); if (res != null) { // keep, what we might have cut off in internal resolution final String resolutionPathInfo = res.getResourceMetadata().getResolutionPathInfo(); logger.debug("map: Path maps to resource {} with path info {}", res, resolutionPathInfo); // find aliases for segments. we can't walk the parent chain // since the request session might not have permissions to // read all parents SLING-2093 final LinkedList<String> names = new LinkedList<String>(); Resource current = res; String path = res.getPath(); while (path != null) { String alias = null; if (current != null && !path.endsWith(JCR_CONTENT_LEAF)) { if (factory.getMapEntries().isOptimizeAliasResolutionEnabled()) { logger.debug("map: Optimize Alias Resolution is Enabled"); String parentPath = ResourceUtil.getParent(path); if (parentPath != null) { final Map<String, String> aliases = factory.getMapEntries().getAliasMap(parentPath); if (aliases != null && aliases.containsValue(current.getName())) { for (String key : aliases.keySet()) { if (current.getName().equals(aliases.get(key))) { alias = key; break; } } } } } else { logger.debug("map: Optimize Alias Resolution is Disabled"); alias = ResourceResolverControl.getProperty(current, PROP_ALIAS); } } if (alias == null || alias.length() == 0) { alias = ResourceUtil.getName(path); } names.add(alias); path = ResourceUtil.getParent(path); if ("/".equals(path)) { path = null; } else if (path != null) { current = res.getResourceResolver().resolve(path); } } // build path from segment names final StringBuilder buf = new StringBuilder(); // construct the path from the segments (or root if none) if (names.isEmpty()) { buf.append('/'); } else { while (!names.isEmpty()) { buf.append('/'); buf.append(names.removeLast()); } } // reappend the resolutionPathInfo if (resolutionPathInfo != null) { buf.append(resolutionPathInfo); } // and then we have the mapped path to work on mappedPath = buf.toString(); logger.debug("map: Alias mapping resolves to path {}", mappedPath); } boolean mappedPathIsUrl = false; for (final MapEntry mapEntry : this.factory.getMapEntries().getMapMaps()) { final String[] mappedPaths = mapEntry.replace(mappedPath); if (mappedPaths != null) { logger.debug("map: Match for Entry {}", mapEntry); mappedPathIsUrl = !mapEntry.isInternal(); if (mappedPathIsUrl && schemehostport != null) { mappedPath = null; for (final String candidate : mappedPaths) { if (candidate.startsWith(schemehostport)) { mappedPath = candidate.substring(schemehostport.length() - 1); mappedPathIsUrl = false; logger.debug("map: Found host specific mapping {} resolving to {}", candidate, mappedPath); break; } else if (candidate.startsWith(schemePrefix) && mappedPath == null) { mappedPath = candidate; } } if (mappedPath == null) { mappedPath = mappedPaths[0]; } } else { // we can only go with assumptions selecting the first entry mappedPath = mappedPaths[0]; } logger.debug("resolve: MapEntry {} matches, mapped path is {}", mapEntry, mappedPath); break; } } // this should not be the case, since mappedPath is primed if (mappedPath == null) { mappedPath = resourcePath; } // [scheme:][//authority][path][?query][#fragment] try { // use commons-httpclient's URI instead of java.net.URI, as it can // actually accept *unescaped* URIs, such as the "mappedPath" and // return them in proper escaped form, including the path, via // toString() final URI uri = new URI(mappedPath, false); // 1. mangle the namespaces in the path String path = mangleNamespaces(uri.getPath()); // 2. prepend servlet context path if we have a request if (request != null && request.getContextPath() != null && request.getContextPath().length() > 0) { path = request.getContextPath().concat(path); } // update the path part of the URI uri.setPath(path); mappedPath = uri.toString(); } catch (final URIException e) { logger.warn("map: Unable to mangle namespaces for " + mappedPath + " returning unmangled", e); } logger.debug("map: Returning URL {} as mapping for path {}", mappedPath, resourcePath); // reappend fragment and/or query if (fragmentQuery != null) { mappedPath = mappedPath.concat(fragmentQuery); } return mappedPath; }
From source file:it.unimi.dsi.sux4j.mph.CHDMinimalPerfectHashFunction.java
/** * Creates a new CHD minimal perfect hash function for the given keys. * /*from w w w . ja v a2s .c o m*/ * @param keys the keys to hash, or {@code null}. * @param transform a transformation strategy for the keys. * @param lambda the average bucket size. * @param loadFactor the load factor. * @param signatureWidth a signature width, or 0 for no signature. * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory. * @param chunkedHashStore a chunked hash store containing the keys, or {@code null}; the store * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. */ protected CHDMinimalPerfectHashFunction(final Iterable<? extends T> keys, final TransformationStrategy<? super T> transform, final int lambda, double loadFactor, final int signatureWidth, final File tempDir, ChunkedHashStore<T> chunkedHashStore) throws IOException { this.transform = transform; final ProgressLogger pl = new ProgressLogger(LOGGER); pl.displayLocalSpeed = true; pl.displayFreeMemory = true; final RandomGenerator r = new XorShift1024StarRandomGenerator(); pl.itemsName = "keys"; final boolean givenChunkedHashStore = chunkedHashStore != null; if (!givenChunkedHashStore) { chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, pl); chunkedHashStore.reset(r.nextLong()); chunkedHashStore.addAll(keys.iterator()); } n = chunkedHashStore.size(); defRetValue = -1; // For the very few cases in which we can decide int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE)); chunkShift = chunkedHashStore.log2Chunks(log2NumChunks); final int numChunks = 1 << log2NumChunks; LOGGER.debug("Number of chunks: " + numChunks); LOGGER.debug("Average chunk size: " + (double) n / numChunks); offsetNumBucketsSeed = new long[(numChunks + 1) * 3 + 2]; int duplicates = 0; final LongArrayList holes = new LongArrayList(); @SuppressWarnings("resource") final OfflineIterable<MutableLong, MutableLong> coefficients = new OfflineIterable<MutableLong, MutableLong>( new Serializer<MutableLong, MutableLong>() { @Override public void write(final MutableLong a, final DataOutput dos) throws IOException { long x = a.longValue(); while ((x & ~0x7FL) != 0) { dos.writeByte((int) (x | 0x80)); x >>>= 7; } dos.writeByte((int) x); } @Override public void read(final DataInput dis, final MutableLong x) throws IOException { byte b = dis.readByte(); long t = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { b = dis.readByte(); t |= (b & 0x7FL) << shift; } x.setValue(t); } }, new MutableLong()); for (;;) { LOGGER.debug("Generating minimal perfect hash function..."); holes.clear(); coefficients.clear(); pl.expectedUpdates = numChunks; pl.itemsName = "chunks"; pl.start("Analysing chunks... "); try { int chunkNumber = 0; for (ChunkedHashStore.Chunk chunk : chunkedHashStore) { /* We treat a chunk as a single hash function. The number of bins is thus * the first prime larger than the chunk size divided by the load factor. */ final int p = Primes.nextPrime((int) Math.ceil(chunk.size() / loadFactor) + 1); final boolean used[] = new boolean[p]; final int numBuckets = (chunk.size() + lambda - 1) / lambda; numBuckets(chunkNumber + 1, numBuckets(chunkNumber) + numBuckets); final int[] cc0 = new int[numBuckets]; final int[] cc1 = new int[numBuckets]; @SuppressWarnings("unchecked") final ArrayList<long[]>[] bucket = new ArrayList[numBuckets]; for (int i = bucket.length; i-- != 0;) bucket[i] = new ArrayList<long[]>(); tryChunk: for (;;) { for (ArrayList<long[]> b : bucket) b.clear(); Arrays.fill(used, false); /* At each try, the allocation to keys to bucket is randomized differently. */ final long seed = r.nextLong(); // System.err.println( "Number of keys: " + chunk.size() + " Number of bins: " + p + " seed: " + seed ); /* We distribute the keys in this chunks in the buckets. */ for (Iterator<long[]> iterator = chunk.iterator(); iterator.hasNext();) { final long[] triple = iterator.next(); final long[] h = new long[3]; Hashes.spooky4(triple, seed, h); final ArrayList<long[]> b = bucket[(int) ((h[0] >>> 1) % numBuckets)]; h[1] = (int) ((h[1] >>> 1) % p); h[2] = (int) ((h[2] >>> 1) % (p - 1)) + 1; // All elements in a bucket must have either different h[ 1 ] or different h[ 2 ] for (long[] t : b) if (t[1] == h[1] && t[2] == h[2]) { LOGGER.info("Duplicate index" + Arrays.toString(t)); continue tryChunk; } b.add(h); } final int[] perm = Util.identity(bucket.length); IntArrays.quickSort(perm, new AbstractIntComparator() { private static final long serialVersionUID = 1L; @Override public int compare(int a0, int a1) { return Integer.compare(bucket[a1].size(), bucket[a0].size()); } }); for (int i = 0; i < perm.length;) { final LinkedList<Integer> bucketsToDo = new LinkedList<Integer>(); final int size = bucket[perm[i]].size(); //System.err.println( "Bucket size: " + size ); int j; // Gather indices of all buckets with the same size for (j = i; j < perm.length && bucket[perm[j]].size() == size; j++) bucketsToDo.add(Integer.valueOf(perm[j])); // Examine for each pair (c0,c1) the buckets still to do ext: for (int c1 = 0; c1 < p; c1++) for (int c0 = 0; c0 < p; c0++) { //System.err.println( "Testing " + c0 + ", " + c1 + " (to do: " + bucketsToDo.size() + ")" ); for (Iterator<Integer> iterator = bucketsToDo.iterator(); iterator.hasNext();) { final int k = iterator.next().intValue(); final ArrayList<long[]> b = bucket[k]; boolean completed = true; final IntArrayList done = new IntArrayList(); // Try to see whether the necessary entries are not used for (long[] h : b) { //assert k == h[ 0 ]; int pos = (int) ((h[1] + c0 * h[2] + c1) % p); //System.err.println( "Testing pos " + pos + " for " + Arrays.toString( e )); if (used[pos]) { completed = false; break; } else { used[pos] = true; done.add(pos); } } if (completed) { // All positions were free cc0[k] = c0; cc1[k] = c1; iterator.remove(); } else for (int d : done) used[d] = false; } if (bucketsToDo.isEmpty()) break ext; } if (!bucketsToDo.isEmpty()) continue tryChunk; seed(chunkNumber, seed); i = j; } break; } // System.err.println("DONE!"); if (ASSERTS) { final IntOpenHashSet pos = new IntOpenHashSet(); final long h[] = new long[3]; for (Iterator<long[]> iterator = chunk.iterator(); iterator.hasNext();) { final long[] triple = iterator.next(); Hashes.spooky4(triple, seed(chunkNumber), h); h[0] = (h[0] >>> 1) % numBuckets; h[1] = (int) ((h[1] >>> 1) % p); h[2] = (int) ((h[2] >>> 1) % (p - 1)) + 1; //System.err.println( Arrays.toString( e ) ); assert pos.add((int) ((h[1] + cc0[(int) (h[0])] * h[2] + cc1[(int) (h[0])]) % p)); } } final MutableLong l = new MutableLong(); for (int i = 0; i < numBuckets; i++) { l.setValue(cc0[i] + cc1[i] * p); coefficients.add(l); } for (int i = 0; i < p; i++) if (!used[i]) holes.add(offset(chunkNumber) + i); offset(chunkNumber + 1, offset(chunkNumber) + p); chunkNumber++; pl.update(); } pl.done(); break; } catch (ChunkedHashStore.DuplicateException e) { if (keys == null) throw new IllegalStateException( "You provided no keys, but the chunked hash store was not checked"); if (duplicates++ > 3) throw new IllegalArgumentException("The input list contains duplicates"); LOGGER.warn("Found duplicate. Recomputing triples..."); chunkedHashStore.reset(r.nextLong()); chunkedHashStore.addAll(keys.iterator()); } } rank = new SparseRank(offset(offsetNumBucketsSeed.length / 3 - 1), holes.size(), holes.iterator()); globalSeed = chunkedHashStore.seed(); this.coefficients = new EliasFanoLongBigList(new AbstractLongIterator() { final OfflineIterator<MutableLong, MutableLong> iterator = coefficients.iterator(); @Override public boolean hasNext() { return iterator.hasNext(); } public long nextLong() { return iterator.next().longValue(); } }, 0, true); coefficients.close(); LOGGER.info("Completed."); LOGGER.info("Actual bit cost per key: " + (double) numBits() / n); if (signatureWidth != 0) { signatureMask = -1L >>> Long.SIZE - signatureWidth; (signatures = LongArrayBitVector.getInstance().asLongBigList(signatureWidth)).size(n); pl.expectedUpdates = n; pl.itemsName = "signatures"; pl.start("Signing..."); for (ChunkedHashStore.Chunk chunk : chunkedHashStore) { Iterator<long[]> iterator = chunk.iterator(); for (int i = chunk.size(); i-- != 0;) { final long[] triple = iterator.next(); long t = getLongByTripleNoCheck(triple); signatures.set(t, signatureMask & triple[0]); pl.lightUpdate(); } } pl.done(); } else { signatureMask = 0; signatures = null; } if (!givenChunkedHashStore) chunkedHashStore.close(); }
From source file:org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.java
@Override public List<StoragePipelineResult> index(List<URI> inputFiles, URI outdirUri, boolean doExtract, boolean doTransform, boolean doLoad) throws StorageEngineException { if (inputFiles.size() == 1 || !doLoad) { return super.index(inputFiles, outdirUri, doExtract, doTransform, doLoad); }/*from w ww .j a v a 2 s . c o m*/ final boolean doArchive; final boolean doMerge; if (!getOptions().containsKey(HADOOP_LOAD_ARCHIVE) && !getOptions().containsKey(HADOOP_LOAD_VARIANT)) { doArchive = true; doMerge = true; } else { doArchive = getOptions().getBoolean(HADOOP_LOAD_ARCHIVE, false); doMerge = getOptions().getBoolean(HADOOP_LOAD_VARIANT, false); } if (!doArchive && !doMerge) { return Collections.emptyList(); } final int nThreadArchive = getOptions().getInt(HADOOP_LOAD_ARCHIVE_BATCH_SIZE, 2); ObjectMap extraOptions = new ObjectMap().append(HADOOP_LOAD_ARCHIVE, true).append(HADOOP_LOAD_VARIANT, false); final List<StoragePipelineResult> concurrResult = new CopyOnWriteArrayList<>(); List<VariantStoragePipeline> etlList = new ArrayList<>(); ExecutorService executorService = Executors.newFixedThreadPool(nThreadArchive, r -> { Thread t = new Thread(r); t.setDaemon(true); return t; }); // Set Daemon for quick shutdown !!! LinkedList<Future<StoragePipelineResult>> futures = new LinkedList<>(); List<Integer> indexedFiles = new CopyOnWriteArrayList<>(); for (URI inputFile : inputFiles) { //Provide a connected storageETL if load is required. VariantStoragePipeline storageETL = newStorageETL(doLoad, new ObjectMap(extraOptions)); futures.add(executorService.submit(() -> { try { Thread.currentThread().setName(Paths.get(inputFile).getFileName().toString()); StoragePipelineResult storagePipelineResult = new StoragePipelineResult(inputFile); URI nextUri = inputFile; boolean error = false; if (doTransform) { try { nextUri = transformFile(storageETL, storagePipelineResult, concurrResult, nextUri, outdirUri); } catch (StoragePipelineException ignore) { //Ignore here. Errors are stored in the ETLResult error = true; } } if (doLoad && doArchive && !error) { try { loadFile(storageETL, storagePipelineResult, concurrResult, nextUri, outdirUri); } catch (StoragePipelineException ignore) { //Ignore here. Errors are stored in the ETLResult error = true; } } if (doLoad && !error) { // Read the VariantSource to get the original fileName (it may be different from the // nextUri.getFileName if this is the transformed file) String fileName = storageETL.readVariantSource(nextUri, null).getFileName(); // Get latest study configuration from DB, might have been changed since StudyConfiguration studyConfiguration = storageETL.getStudyConfiguration(); // Get file ID for the provided file name Integer fileId = studyConfiguration.getFileIds().get(fileName); indexedFiles.add(fileId); } return storagePipelineResult; } finally { try { storageETL.close(); } catch (StorageEngineException e) { logger.error("Issue closing DB connection ", e); } } })); } executorService.shutdown(); int errors = 0; try { while (!futures.isEmpty()) { executorService.awaitTermination(1, TimeUnit.MINUTES); // Check values if (futures.peek().isDone() || futures.peek().isCancelled()) { Future<StoragePipelineResult> first = futures.pop(); StoragePipelineResult result = first.get(1, TimeUnit.MINUTES); if (result.getTransformError() != null) { //TODO: Handle errors. Retry? errors++; result.getTransformError().printStackTrace(); } else if (result.getLoadError() != null) { //TODO: Handle errors. Retry? errors++; result.getLoadError().printStackTrace(); } concurrResult.add(result); } } if (errors > 0) { throw new StoragePipelineException("Errors found", concurrResult); } if (doLoad && doMerge) { int batchMergeSize = getOptions().getInt(HADOOP_LOAD_VARIANT_BATCH_SIZE, 10); // Overwrite default ID list with user provided IDs List<Integer> pendingFiles = indexedFiles; if (getOptions().containsKey(HADOOP_LOAD_VARIANT_PENDING_FILES)) { List<Integer> idList = getOptions().getAsIntegerList(HADOOP_LOAD_VARIANT_PENDING_FILES); if (!idList.isEmpty()) { // only if the list is not empty pendingFiles = idList; } } List<Integer> filesToMerge = new ArrayList<>(batchMergeSize); int i = 0; for (Iterator<Integer> iterator = pendingFiles.iterator(); iterator.hasNext(); i++) { Integer indexedFile = iterator.next(); filesToMerge.add(indexedFile); if (filesToMerge.size() == batchMergeSize || !iterator.hasNext()) { extraOptions = new ObjectMap().append(HADOOP_LOAD_ARCHIVE, false) .append(HADOOP_LOAD_VARIANT, true) .append(HADOOP_LOAD_VARIANT_PENDING_FILES, filesToMerge); AbstractHadoopVariantStoragePipeline localEtl = newStorageETL(doLoad, extraOptions); int studyId = getOptions().getInt(Options.STUDY_ID.key()); localEtl.preLoad(inputFiles.get(i), outdirUri); localEtl.merge(studyId, filesToMerge); localEtl.postLoad(inputFiles.get(i), outdirUri); filesToMerge.clear(); } } annotateLoadedFiles(outdirUri, inputFiles, concurrResult, getOptions()); calculateStatsForLoadedFiles(outdirUri, inputFiles, concurrResult, getOptions()); } } catch (InterruptedException e) { Thread.interrupted(); throw new StoragePipelineException("Interrupted!", e, concurrResult); } catch (ExecutionException e) { throw new StoragePipelineException("Execution exception!", e, concurrResult); } catch (TimeoutException e) { throw new StoragePipelineException("Timeout Exception", e, concurrResult); } finally { if (!executorService.isShutdown()) { try { executorService.shutdownNow(); } catch (Exception e) { logger.error("Problems shutting executer service down", e); } } } return concurrResult; }
From source file:com.ikanow.aleph2.management_db.services.DataBucketCrudService.java
/** Validates whether the new or updated bucket is valid: both in terms of authorization and in terms of format * @param bucket/*from w ww . j a v a2 s . c o m*/ * @return * @throws ExecutionException * @throws InterruptedException */ protected Tuple2<DataBucketBean, Collection<BasicMessageBean>> validateBucket(final DataBucketBean bucket, final Optional<DataBucketBean> old_version, boolean do_full_checks, final boolean allow_system_names) throws InterruptedException, ExecutionException { // (will live with this being mutable) final LinkedList<BasicMessageBean> errors = new LinkedList<BasicMessageBean>(); final JsonNode bucket_json = BeanTemplateUtils.toJson(bucket); ///////////////// // PHASE 1 // Check for missing fields ManagementDbErrorUtils.NEW_BUCKET_ERROR_MAP.keySet().stream() .filter(s -> !bucket_json.has(s) || (bucket_json.get(s).isTextual() && bucket_json.get(s).asText().isEmpty())) .forEach(s -> errors.add(MgmtCrudUtils .createValidationError(ErrorUtils.get(ManagementDbErrorUtils.NEW_BUCKET_ERROR_MAP.get(s), Optional.ofNullable(bucket.full_name()).orElse("(unknown)"))))); // We have a full name if we're here, so no check for uniqueness // Check for some bucket path restrictions if (null != bucket.full_name()) { if (!BucketValidationUtils.bucketPathFormatValidationCheck(bucket.full_name())) { errors.add(MgmtCrudUtils .createValidationError(ErrorUtils.get(ManagementDbErrorUtils.BUCKET_FULL_NAME_FORMAT_ERROR, Optional.ofNullable(bucket.full_name()).orElse("(unknown)")))); return Tuples._2T(bucket, errors); // (this is catastrophic obviously) } if (!old_version.isPresent()) { // (create not update) if (do_full_checks) { if (this._underlying_data_bucket_db.get().countObjectsBySpec(CrudUtils .allOf(DataBucketBean.class).when(DataBucketBean::full_name, bucket.full_name())) .get() > 0) { errors.add(MgmtCrudUtils.createValidationError( ErrorUtils.get(ManagementDbErrorUtils.BUCKET_FULL_NAME_UNIQUENESS, Optional.ofNullable(bucket.full_name()).orElse("(unknown)")))); return Tuples._2T(bucket, errors); // (this is catastrophic obviously) } } } } else return Tuples._2T(bucket, errors); // (this is catastrophic obviously) // Some static validation moved into a separate function for testability errors.addAll(BucketValidationUtils.staticValidation(bucket, allow_system_names)); // OK before I do any more stateful checking, going to stop if we have logic errors first if (!errors.isEmpty()) { return Tuples._2T(bucket, errors); } ///////////////// // PHASE 2 //TODO (ALEPH-19): multi buckets - authorization; other - authorization if (do_full_checks) { final CompletableFuture<Collection<BasicMessageBean>> bucket_path_errors_future = validateOtherBucketsInPathChain( bucket); errors.addAll(bucket_path_errors_future.join()); // OK before I do any more stateful checking, going to stop if we have logic errors first if (!errors.isEmpty()) { return Tuples._2T(bucket, errors); } } ///////////////// // PHASE 3 // Finally Check whether I am allowed to update the various fields if old_version.isPresent() if (old_version.isPresent()) { final DataBucketBean old_bucket = old_version.get(); if (!bucket.full_name().equals(old_bucket.full_name())) { errors.add(MgmtCrudUtils .createValidationError(ErrorUtils.get(ManagementDbErrorUtils.BUCKET_UPDATE_FULLNAME_CHANGED, bucket.full_name(), old_bucket.full_name()))); } if (!bucket.owner_id().equals(old_bucket.owner_id())) { errors.add(MgmtCrudUtils .createValidationError(ErrorUtils.get(ManagementDbErrorUtils.BUCKET_UPDATE_OWNERID_CHANGED, bucket.full_name(), old_bucket.owner_id()))); } } ///////////////// // PHASE 4 - DATA SCHEMA NOT MESSAGES AT THIS POINT CAN BE INFO, YOU NEED TO CHECK THE SUCCESS() Tuple2<Map<String, String>, List<BasicMessageBean>> schema_validation = BucketValidationUtils .validateSchema(bucket, _service_context); errors.addAll(schema_validation._2()); return Tuples._2T( BeanTemplateUtils.clone(bucket).with(DataBucketBean::data_locations, schema_validation._1()).done(), errors); }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java
/** * The method that creates the Job corresponding to a MapReduceOper. * The assumption is that/*from w ww .j a v a 2 s . co m*/ * every MapReduceOper will have a load and a store. The JobConf removes * the load operator and serializes the input filespec so that PigInputFormat can * take over the creation of splits. It also removes the store operator * and serializes the output filespec so that PigOutputFormat can take over * record writing. The remaining portion of the map plan and reduce plans are * serialized and stored for the PigMapReduce or PigMapOnly objects to take over * the actual running of the plans. * The Mapper & Reducer classes and the required key value formats are set. * Checks if this is a map only job and uses PigMapOnly class as the mapper * and uses PigMapReduce otherwise. * If it is a Map Reduce job, it is bound to have a package operator. Remove it from * the reduce plan and serializes it so that the PigMapReduce class can use it to package * the indexed tuples received by the reducer. * @param mro - The MapReduceOper for which the JobConf is required * @param config - the Configuration object from which JobConf is built * @param pigContext - The PigContext passed on from execution engine * @return Job corresponding to mro * @throws JobCreationException */ @SuppressWarnings({ "unchecked" }) private Job getJob(MROperPlan plan, MapReduceOper mro, Configuration config, PigContext pigContext) throws JobCreationException { org.apache.hadoop.mapreduce.Job nwJob = null; try { nwJob = new org.apache.hadoop.mapreduce.Job(config); } catch (Exception e) { throw new JobCreationException(e); } Configuration conf = nwJob.getConfiguration(); ArrayList<FileSpec> inp = new ArrayList<FileSpec>(); ArrayList<List<OperatorKey>> inpTargets = new ArrayList<List<OperatorKey>>(); ArrayList<String> inpSignatureLists = new ArrayList<String>(); ArrayList<Long> inpLimits = new ArrayList<Long>(); ArrayList<POStore> storeLocations = new ArrayList<POStore>(); Path tmpLocation = null; // add settings for pig statistics String setScriptProp = conf.get(PigConfiguration.PIG_SCRIPT_INFO_ENABLED, "true"); if (setScriptProp.equalsIgnoreCase("true")) { MRScriptState ss = MRScriptState.get(); ss.addSettingsToConf(mro, conf); } conf.set(MRConfiguration.MAPPER_NEW_API, "true"); conf.set(MRConfiguration.REDUCER_NEW_API, "true"); String buffPercent = conf.get(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT); if (buffPercent == null || Double.parseDouble(buffPercent) <= 0) { log.info(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT + " is not set, set to default 0.3"); conf.set(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT, "0.3"); } else { log.info(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT + " is set to " + conf.get(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT)); } configureCompression(conf); try { //Process the POLoads List<POLoad> lds = PlanHelper.getPhysicalOperators(mro.mapPlan, POLoad.class); if (lds != null && lds.size() > 0) { for (POLoad ld : lds) { LoadFunc lf = ld.getLoadFunc(); lf.setLocation(ld.getLFile().getFileName(), nwJob); //Store the inp filespecs inp.add(ld.getLFile()); } } if (!mro.reducePlan.isEmpty()) { log.info("Reduce phase detected, estimating # of required reducers."); adjustNumReducers(plan, mro, nwJob); } else { nwJob.setNumReduceTasks(0); } if (!pigContext.inIllustrator && !pigContext.getExecType().isLocal()) { if (okToRunLocal(nwJob, mro, lds)) { log.info(SMALL_JOB_LOG_MSG); // override with the default conf to run in local mode for (Entry<String, String> entry : defaultConf) { String key = entry.getKey(); if (key.equals(MRConfiguration.REDUCE_TASKS) || key.equals(MRConfiguration.JOB_REDUCES)) { // this must not be set back to the default in case it has been set to 0 for example. continue; } if (key.startsWith("fs.")) { // we don't want to change fs settings back continue; } if (key.startsWith("io.")) { // we don't want to change io settings back continue; } String value = entry.getValue(); if (conf.get(key) == null || !conf.get(key).equals(value)) { conf.set(key, value); } } conf.setBoolean(PigImplConstants.CONVERTED_TO_LOCAL, true); } else { log.info(BIG_JOB_LOG_MSG); // Search to see if we have any UDF/LoadFunc/StoreFunc that need to pack things into the // distributed cache. List<String> cacheFiles = new ArrayList<String>(); List<String> shipFiles = new ArrayList<String>(); UdfCacheShipFilesVisitor mapUdfCacheFileVisitor = new UdfCacheShipFilesVisitor(mro.mapPlan); mapUdfCacheFileVisitor.visit(); cacheFiles.addAll(mapUdfCacheFileVisitor.getCacheFiles()); shipFiles.addAll(mapUdfCacheFileVisitor.getShipFiles()); UdfCacheShipFilesVisitor reduceUdfCacheFileVisitor = new UdfCacheShipFilesVisitor( mro.reducePlan); reduceUdfCacheFileVisitor.visit(); cacheFiles.addAll(reduceUdfCacheFileVisitor.getCacheFiles()); shipFiles.addAll(reduceUdfCacheFileVisitor.getShipFiles()); setupDistributedCache(pigContext, conf, cacheFiles.toArray(new String[] {}), false); // Setup the DistributedCache for this job List<URL> allJars = new ArrayList<URL>(); for (URL extraJar : pigContext.extraJars) { if (!allJars.contains(extraJar)) { allJars.add(extraJar); } } for (String udf : mro.UDFs) { Class clazz = pigContext.getClassForAlias(udf); if (clazz != null) { String jar = JarManager.findContainingJar(clazz); if (jar != null) { URL jarURL = new File(jar).toURI().toURL(); if (!allJars.contains(jarURL)) { allJars.add(jarURL); } } } } for (String scriptJar : pigContext.scriptJars) { URL jar = new File(scriptJar).toURI().toURL(); if (!allJars.contains(jar)) { allJars.add(jar); } } for (String shipFile : shipFiles) { URL jar = new File(shipFile).toURI().toURL(); if (!allJars.contains(jar)) { allJars.add(jar); } } for (String defaultJar : JarManager.getDefaultJars()) { URL jar = new File(defaultJar).toURI().toURL(); if (!allJars.contains(jar)) { allJars.add(jar); } } for (URL jar : allJars) { boolean predeployed = false; for (String predeployedJar : pigContext.predeployedJars) { if (predeployedJar.contains(new File(jar.toURI()).getName())) { predeployed = true; } } if (!predeployed) { if (jar.getFile().toLowerCase().endsWith(".jar")) { putJarOnClassPathThroughDistributedCache(pigContext, conf, jar); } else { setupDistributedCache(pigContext, conf, new String[] { jar.getPath() }, true); } } } File scriptUDFJarFile = JarManager.createPigScriptUDFJar(pigContext); if (scriptUDFJarFile != null) { putJarOnClassPathThroughDistributedCache(pigContext, conf, scriptUDFJarFile.toURI().toURL()); } } } for (String udf : mro.UDFs) { if (udf.contains("GFCross")) { Object func = PigContext.instantiateFuncFromSpec(new FuncSpec(udf)); if (func instanceof GFCross) { String crossKey = ((GFCross) func).getCrossKey(); conf.set(PigImplConstants.PIG_CROSS_PARALLELISM + "." + crossKey, Integer.toString(mro.getRequestedParallelism())); } } } if (lds != null && lds.size() > 0) { for (POLoad ld : lds) { //Store the target operators for tuples read //from this input List<PhysicalOperator> ldSucs = mro.mapPlan.getSuccessors(ld); List<OperatorKey> ldSucKeys = new ArrayList<OperatorKey>(); if (ldSucs != null) { for (PhysicalOperator operator2 : ldSucs) { ldSucKeys.add(operator2.getOperatorKey()); } } inpTargets.add(ldSucKeys); inpSignatureLists.add(ld.getSignature()); inpLimits.add(ld.getLimit()); //Remove the POLoad from the plan if (!pigContext.inIllustrator) mro.mapPlan.remove(ld); } } if (Utils.isLocal(pigContext, conf)) { ConfigurationUtil.replaceConfigForLocalMode(conf); } conf.set(PigInputFormat.PIG_INPUTS, ObjectSerializer.serialize(inp)); conf.set(PigInputFormat.PIG_INPUT_TARGETS, ObjectSerializer.serialize(inpTargets)); conf.set(PigInputFormat.PIG_INPUT_SIGNATURES, ObjectSerializer.serialize(inpSignatureLists)); conf.set(PigInputFormat.PIG_INPUT_LIMITS, ObjectSerializer.serialize(inpLimits)); // Removing job credential entry before serializing pigcontext into jobconf // since this path would be invalid for the new job being created pigContext.getProperties().remove("mapreduce.job.credentials.binary"); conf.setBoolean(PigImplConstants.PIG_EXECTYPE_MODE_LOCAL, pigContext.getExecType().isLocal()); conf.set(PigImplConstants.PIG_LOG4J_PROPERTIES, ObjectSerializer.serialize(pigContext.getLog4jProperties())); conf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList())); // this is for unit tests since some don't create PigServer // if user specified the job name using -D switch, Pig won't reset the name then. if (System.getProperty(MRConfiguration.JOB_NAME) == null && pigContext.getProperties().getProperty(PigContext.JOB_NAME) != null) { nwJob.setJobName(pigContext.getProperties().getProperty(PigContext.JOB_NAME)); } if (pigContext.getProperties().getProperty(PigContext.JOB_PRIORITY) != null) { // If the job priority was set, attempt to get the corresponding enum value // and set the hadoop job priority. String jobPriority = pigContext.getProperties().getProperty(PigContext.JOB_PRIORITY).toUpperCase(); try { // Allow arbitrary case; the Hadoop job priorities are all upper case. conf.set(MRConfiguration.JOB_PRIORITY, JobPriority.valueOf(jobPriority).toString()); } catch (IllegalArgumentException e) { StringBuffer sb = new StringBuffer("The job priority must be one of ["); JobPriority[] priorities = JobPriority.values(); for (int i = 0; i < priorities.length; ++i) { if (i > 0) sb.append(", "); sb.append(priorities[i]); } sb.append("]. You specified [" + jobPriority + "]"); throw new JobCreationException(sb.toString()); } } setupDistributedCache(pigContext, conf, pigContext.getProperties(), "pig.streaming.ship.files", true); setupDistributedCache(pigContext, conf, pigContext.getProperties(), "pig.streaming.cache.files", false); nwJob.setInputFormatClass(PigInputFormat.class); // tmp file compression setups // PIG-3741 This must be done before setStoreLocation on POStores Utils.setTmpFileCompressionOnConf(pigContext, conf); //Process POStore and remove it from the plan LinkedList<POStore> mapStores = PlanHelper.getPhysicalOperators(mro.mapPlan, POStore.class); LinkedList<POStore> reduceStores = PlanHelper.getPhysicalOperators(mro.reducePlan, POStore.class); for (POStore st : mapStores) { storeLocations.add(st); StoreFuncInterface sFunc = st.getStoreFunc(); sFunc.setStoreLocation(st.getSFile().getFileName(), nwJob); if (sFunc instanceof OverwritableStoreFunc) { OverwritableStoreFunc osf = (OverwritableStoreFunc) sFunc; if (osf.shouldOverwrite()) { osf.cleanupOutput(st, nwJob); } } } for (POStore st : reduceStores) { storeLocations.add(st); StoreFuncInterface sFunc = st.getStoreFunc(); sFunc.setStoreLocation(st.getSFile().getFileName(), nwJob); if (sFunc instanceof OverwritableStoreFunc) { OverwritableStoreFunc osf = (OverwritableStoreFunc) sFunc; if (osf.shouldOverwrite()) { osf.cleanupOutput(st, nwJob); } } } setOutputFormat(nwJob); if (mapStores.size() + reduceStores.size() == 1) { // single store case log.info("Setting up single store job"); POStore st; if (reduceStores.isEmpty()) { st = mapStores.get(0); if (!pigContext.inIllustrator) mro.mapPlan.remove(st); } else { st = reduceStores.get(0); if (!pigContext.inIllustrator) mro.reducePlan.remove(st); } MapRedUtil.setupStreamingDirsConfSingle(st, pigContext, conf); } else if (mapStores.size() + reduceStores.size() > 0) { // multi store case log.info("Setting up multi store job"); MapRedUtil.setupStreamingDirsConfMulti(pigContext, conf); boolean disableCounter = conf.getBoolean("pig.disable.counter", false); if (disableCounter) { log.info("Disable Pig custom output counters"); } int idx = 0; for (POStore sto : storeLocations) { sto.setDisableCounter(disableCounter); sto.setMultiStore(true); sto.setIndex(idx++); } } // store map key type // this is needed when the key is null to create // an appropriate NullableXXXWritable object conf.set("pig.map.keytype", ObjectSerializer.serialize(new byte[] { mro.mapKeyType })); // set parent plan in all operators in map and reduce plans // currently the parent plan is really used only when POStream is present in the plan new PhyPlanSetter(mro.mapPlan).visit(); new PhyPlanSetter(mro.combinePlan).visit(); new PhyPlanSetter(mro.reducePlan).visit(); // this call modifies the ReplFiles names of POFRJoin operators // within the MR plans, must be called before the plans are // serialized setupDistributedCacheForJoin(mro, pigContext, conf); SchemaTupleFrontend.copyAllGeneratedToDistributedCache(pigContext, conf); POPackage pack = null; if (mro.reducePlan.isEmpty()) { //MapOnly Job nwJob.setMapperClass(PigMapOnly.Map.class); if (!pigContext.inIllustrator) conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan)); if (mro.isEndOfAllInputSetInMap()) { // this is used in Map.close() to decide whether the // pipeline needs to be rerun one more time in the close() // The pipeline is rerun if there either was a stream or POMergeJoin conf.set(END_OF_INP_IN_MAP, "true"); } } else { //Map Reduce Job //Process the POPackage operator and remove it from the reduce plan if (!mro.combinePlan.isEmpty()) { POPackage combPack = (POPackage) mro.combinePlan.getRoots().get(0); mro.combinePlan.remove(combPack); nwJob.setCombinerClass(PigCombiner.Combine.class); conf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan)); conf.set("pig.combine.package", ObjectSerializer.serialize(combPack)); } else if (mro.needsDistinctCombiner()) { nwJob.setCombinerClass(DistinctCombiner.Combine.class); log.info("Setting identity combiner class."); } pack = (POPackage) mro.reducePlan.getRoots().get(0); if (!pigContext.inIllustrator) { mro.reducePlan.remove(pack); } nwJob.setMapperClass(PigMapReduce.Map.class); nwJob.setReducerClass(PigMapReduce.Reduce.class); if (mro.customPartitioner != null) nwJob.setPartitionerClass(PigContext.resolveClassName(mro.customPartitioner)); if (!pigContext.inIllustrator) conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan)); if (mro.isEndOfAllInputSetInMap()) { // this is used in Map.close() to decide whether the // pipeline needs to be rerun one more time in the close() // The pipeline is rerun only if there was a stream or merge-join. conf.set(END_OF_INP_IN_MAP, "true"); } if (!pigContext.inIllustrator) conf.set("pig.reducePlan", ObjectSerializer.serialize(mro.reducePlan)); if (mro.isEndOfAllInputSetInReduce()) { // this is used in Map.close() to decide whether the // pipeline needs to be rerun one more time in the close() // The pipeline is rerun only if there was a stream conf.set("pig.stream.in.reduce", "true"); } if (!pigContext.inIllustrator) conf.set("pig.reduce.package", ObjectSerializer.serialize(pack)); conf.set("pig.reduce.key.type", Byte.toString(pack.getPkgr().getKeyType())); if (mro.getUseSecondaryKey()) { nwJob.setGroupingComparatorClass(PigSecondaryKeyGroupComparator.class); nwJob.setPartitionerClass(SecondaryKeyPartitioner.class); nwJob.setSortComparatorClass(PigSecondaryKeyComparator.class); nwJob.setOutputKeyClass(NullableTuple.class); conf.set("pig.secondarySortOrder", ObjectSerializer.serialize(mro.getSecondarySortOrder())); } else { Class<? extends WritableComparable> keyClass = HDataType .getWritableComparableTypes(pack.getPkgr().getKeyType()).getClass(); nwJob.setOutputKeyClass(keyClass); selectComparator(mro, pack.getPkgr().getKeyType(), nwJob); } nwJob.setOutputValueClass(NullableTuple.class); } if (mro.isGlobalSort() || mro.isLimitAfterSort()) { if (mro.isGlobalSort()) { String symlink = addSingleFileToDistributedCache(pigContext, conf, mro.getQuantFile(), "pigsample"); conf.set("pig.quantilesFile", symlink); nwJob.setPartitionerClass(WeightedRangePartitioner.class); } if (mro.isUDFComparatorUsed) { boolean usercomparator = false; for (String compFuncSpec : mro.UDFs) { Class comparator = PigContext.resolveClassName(compFuncSpec); if (ComparisonFunc.class.isAssignableFrom(comparator)) { nwJob.setMapperClass(PigMapReduce.MapWithComparator.class); nwJob.setReducerClass(PigMapReduce.ReduceWithComparator.class); conf.set("pig.reduce.package", ObjectSerializer.serialize(pack)); conf.set("pig.usercomparator", "true"); nwJob.setOutputKeyClass(NullableTuple.class); nwJob.setSortComparatorClass(comparator); usercomparator = true; break; } } if (!usercomparator) { String msg = "Internal error. Can't find the UDF comparator"; throw new IOException(msg); } } else { conf.set("pig.sortOrder", ObjectSerializer.serialize(mro.getSortOrder())); } } if (mro.isSkewedJoin()) { String symlink = addSingleFileToDistributedCache(pigContext, conf, mro.getSkewedJoinPartitionFile(), "pigdistkey"); conf.set("pig.keyDistFile", symlink); nwJob.setPartitionerClass(SkewedPartitioner.class); nwJob.setMapperClass(PigMapReduce.MapWithPartitionIndex.class); nwJob.setMapOutputKeyClass(NullablePartitionWritable.class); nwJob.setGroupingComparatorClass(PigGroupingPartitionWritableComparator.class); } if (mro.isCounterOperation()) { if (mro.isRowNumber()) { nwJob.setMapperClass(PigMapReduceCounter.PigMapCounter.class); } else { nwJob.setReducerClass(PigMapReduceCounter.PigReduceCounter.class); } } if (mro.isRankOperation()) { Iterator<String> operationIDs = mro.getRankOperationId().iterator(); while (operationIDs.hasNext()) { String operationID = operationIDs.next(); Iterator<Pair<String, Long>> itPairs = globalCounters.get(operationID).iterator(); Pair<String, Long> pair = null; while (itPairs.hasNext()) { pair = itPairs.next(); conf.setLong(pair.first, pair.second); } } } if (!pigContext.inIllustrator) { // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized for (POStore st : mapStores) { st.setInputs(null); st.setParentPlan(null); } for (POStore st : reduceStores) { st.setInputs(null); st.setParentPlan(null); } conf.set(PIG_MAP_STORES, ObjectSerializer.serialize(mapStores)); conf.set(PIG_REDUCE_STORES, ObjectSerializer.serialize(reduceStores)); } String tmp; long maxCombinedSplitSize = 0; if (!mro.combineSmallSplits() || pigContext.getProperties().getProperty("pig.splitCombination", "true").equals("false")) conf.setBoolean("pig.noSplitCombination", true); else if ((tmp = pigContext.getProperties().getProperty("pig.maxCombinedSplitSize", null)) != null) { try { maxCombinedSplitSize = Long.parseLong(tmp); } catch (NumberFormatException e) { log.warn( "Invalid numeric format for pig.maxCombinedSplitSize; use the default maximum combined split size"); } } if (maxCombinedSplitSize > 0) conf.setLong("pig.maxCombinedSplitSize", maxCombinedSplitSize); // It's a hack to set distributed cache file for hadoop 23. Once MiniMRCluster do not require local // jar on fixed location, this can be removed if (pigContext.getExecType() == ExecType.MAPREDUCE) { String newfiles = conf.get("alternative.mapreduce.job.cache.files"); if (newfiles != null) { String files = conf.get(MRConfiguration.JOB_CACHE_FILES); conf.set(MRConfiguration.JOB_CACHE_FILES, files == null ? newfiles.toString() : files + "," + newfiles); } } // Serialize the UDF specific context info. UDFContext.getUDFContext().serialize(conf); Job cjob = new Job(new JobConf(conf), new ArrayList<Job>()); jobStoreMap.put(cjob, new Pair<List<POStore>, Path>(storeLocations, tmpLocation)); return cjob; } catch (JobCreationException jce) { throw jce; } catch (Exception e) { int errCode = 2017; String msg = "Internal error creating job configuration."; throw new JobCreationException(msg, errCode, PigException.BUG, e); } }
From source file:Commands.AddShoesCommand.java
@Override public String executeCommand(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String forwardToJsp = ""; HttpSession session = request.getSession(true); ShoesDao sd = new ShoesDao(); boolean check = true; ArrayList<String> addList = new ArrayList<>(); ArrayList<Shoes> list = new ArrayList<>(); HashMap<Integer, LinkedList<String>> status = new HashMap<>(); HashMap<Integer, LinkedList<Integer>> color = new HashMap<>(); LinkedList<String> s = null; LinkedList<Integer> c = null; if (session.getAttribute("userLogin") != null && ((User) session.getAttribute("userLogin")).is_Admin()) { if (request.getParameter("number") != null) { int num = Integer.parseInt(request.getParameter("number")); for (int n = 1; n < num; n++) { s = new LinkedList<>(); c = new LinkedList<>(); boolean check1 = true; if (request.getParameter("name-" + n) != null && request.getParameter("brand-" + n) != null && request.getParameter("sport-" + n) != null) { if (request.getParameter("name-" + n).isEmpty()) { session.setAttribute("errorMsg", "The name cannot be empty"); return forwardToJsp = "AddShoes.jsp"; }//w w w .jav a2 s. com String name = (String) request.getParameter("name-" + n).substring(0, 1).toUpperCase() + request.getParameter("name-" + n).substring(1).toLowerCase(); int brand = Integer.parseInt(request.getParameter("brand-" + n)); int sport = Integer.parseInt(request.getParameter("sport-" + n)); if (request.getParameter("price-" + n).isEmpty()) { s.add("price cannot be empty"); } double price = Double.parseDouble(request.getParameter("price-" + n)); if (price < 1 || price > 200) { s.add("price range is between 1 to 200"); } if (!sd.findShoes(name).isEmpty()) { s.add("The name is repeated"); } boolean repeat = false; for (int i = 1; i < 4; i++) { if (request.getParameter("color" + i + "-" + n) != null) { int id = Integer.parseInt(request.getParameter("color" + i + "-" + n)); if (c.contains(id)) { repeat = true; } c.add(id); } } if (repeat) { s.add("The color is repeated"); } // String[] files1 = request.getParameterValues("file1-" + n); // String[] files2 = request.getParameterValues("file2-" + n); // String[] files3 = request.getParameterValues("file3-" + n); // long a=Arrays.stream(files1).filter((String st) -> !st.isEmpty()).count(); // long b=Arrays.stream(files1).filter((String st) -> !st.isEmpty()).count(); // long d=Arrays.stream(files1).filter((String st) -> !st.isEmpty()).count(); // if(a==0 || b==0 || d==0){ // s.add("Images is not uploaded"); // } // p.add(files1); // p.add(files2); // p.add(files3); if (!s.isEmpty()) { status.put(n, s); } color.put(n, c); list.add(new Shoes(n, brand, 0, sport, name, price, "")); } else { check = false; break; } } ColorDao cd = new ColorDao(); response.setContentType("text/html"); session.setAttribute("list", list); session.setAttribute("status", status); session.setAttribute("allcolor", color); if (status.isEmpty() && check) { for (int i = 0; i < list.size(); i++) { c = color.get(i + 1); Iterator<Integer> iter = c.iterator(); int count = 1; while (iter.hasNext()) { String name = list.get(i).getName(); int colorId = iter.next(); String colorName = cd.findColorById(colorId).getColor_Name(); String pic = name + "-" + colorName + "-"; sd.addShoes(list.get(i).getBrandID(), colorId, list.get(i).getTypeID(), name, list.get(i).getPrice(), pic); String colo = request.getParameter("cr" + count + "-" + (i + 1)); String[] col = colo.split(","); String UPLOAD_DIRECTORY = request.getServletContext().getRealPath("") + File.separator + "img" + File.separator; int count1 = 1; for (String str : col) { File file = new File(UPLOAD_DIRECTORY + str.substring(4)); File f = new File(UPLOAD_DIRECTORY + pic + count1 + ".jpg"); try { boolean check1 = file.renameTo(f); if (check1 == false) { session.setAttribute("errorMsg", str.substring(4) + " " + UPLOAD_DIRECTORY + pic); return "AddShoes.jsp"; } } catch (SecurityException | NullPointerException se) { session.setAttribute("errorMsg", Arrays.toString(se.getStackTrace())); return "AddShoes.jsp"; } count1++; } count++; } } session.setAttribute("errorMsg", "Shoes is successful added"); // session.removeAttribute("list"); // session.removeAttribute("allcolor"); // session.removeAttribute("status"); } else { session.setAttribute("errorMsg", "Please fill the form with correct information"); forwardToJsp = "AddShoes.jsp"; } } else { session.setAttribute("errorMsg", "Fail to save changes, please refresh the page and try again"); forwardToJsp = "shoes.jsp"; } } else { session.setAttribute("errorMsg", "You are not allowed to access this page"); forwardToJsp = "index.jsp"; } return forwardToJsp; }
From source file:elh.eus.absa.Features.java
/** * Lemma ngram extraction from a kaf document * /*from ww w . j av a 2s. co m*/ * @param int length : which 'n' use for 'n-grams' * @param KAFDocument kafDoc : postagged kaf document to extract ngrams from. * @param boolean save : safe ngrams to file or not. * @return TreeSet<String> return lemma ngrams of length length */ private int extractLemmaNgrams(int length, KAFDocument kafDoc, List<String> discardPos, boolean save) { //System.err.println("lemma ngram extraction: _"+length+"_"); if (length == 0) { return 0; } int sentNum = kafDoc.getSentences().size(); for (int s = 0; s < sentNum; s++) { LinkedList<String> ngrams = new LinkedList<String>(); for (Term term : kafDoc.getTermsBySent(s)) { if (ngrams.size() >= length) { ngrams.removeFirst(); } //if no alphanumeric char is present discard the element as invalid ngram. Or if it has a PoS tag that //should be discarded String lCurrent = term.getLemma(); if ((!discardPos.contains(term.getPos())) && (!lCurrent.matches("[^\\p{L}\\p{M}\\p{Nd}\\p{InEmoticons}]+")) && (lCurrent.length() > 1)) { ngrams.add(lCurrent); //ngrams.add(normalize(term.getLemma(), params.getProperty("normalization", "none"))); } //certain punctuation marks and emoticons are allowed as lemmas else if ((lCurrent.length() <= 2) && (lCurrent.matches("[,;.?!]"))) { ngrams.add(lCurrent); } // add ngrams to the feature list for (int i = 0; i < ngrams.size(); i++) { String ng = featureFromArray(ngrams.subList(0, i + 1), "lemma"); addNgram("lemma", ng); } } //empty ngram list and add remaining ngrams to the feature list while (!ngrams.isEmpty()) { String ng = featureFromArray(ngrams, "lemma"); addNgram("lemma", ng); ngrams.removeFirst(); } } return 1; }
From source file:de.betterform.agent.web.event.EventQueue.java
public List<XMLEvent> aggregateEventList() { // Stack is used to "navigate" through the event list LinkedList<XMLEvent> aggregatedFocusList = new LinkedList<XMLEvent>(); Stack<XMLEvent> aggregatedInsertEventsStack = new Stack(); Stack<XMLEvent> aggregatedEmbedEventsStack = new Stack(); ArrayList<XMLEvent> aggregatedEventList = new ArrayList<XMLEvent>(eventList.size()); for (XMLEvent xmlEvent : this.loadEmbedEventList) { aggregatedEventList.add(xmlEvent); }// ww w .j ava2s . c o m this.loadEmbedEventList.clear(); for (int i = 0; i < eventList.size(); i++) { XercesXMLEvent xmlEvent = (XercesXMLEvent) eventList.get(i); XercesXMLEvent xmlEventToAdd = new XercesXMLEvent(); // Map PROTOTYPE_CLONED event to betterform-insert-repeatitem or betterform-insert-itemset event // and copy event properties to new created XMLEvent if (xmlEvent.getType().equals(BetterFormEventNames.PROTOTYPE_CLONED)) { if (xmlEvent.getContextInfo("targetName").equals(XFormsConstants.ITEMSET)) { xmlEventToAdd.initXMLEvent("betterform-insert-itemset", xmlEvent.getBubbles(), xmlEvent.getCancelable(), xmlEvent.getContextInfo()); } else { xmlEventToAdd.initXMLEvent("betterform-insert-repeatitem", xmlEvent.getBubbles(), xmlEvent.getCancelable(), xmlEvent.getContextInfo()); } xmlEventToAdd.target = xmlEvent.target; xmlEvent.addProperty("generatedIds", new HashMap()); aggregatedEventList.add(xmlEventToAdd); // push XMLEvent to Stack for further processing aggregatedInsertEventsStack.push(xmlEventToAdd); } // add all generated ids to surrounding betterform-insert-repeatitem or betterform-insert-itemset event else if (xmlEvent.getType().equals(BetterFormEventNames.ID_GENERATED) && aggregatedInsertEventsStack.size() > 0) { XMLEvent aggregatingInsertEvent = aggregatedInsertEventsStack.peek(); ((HashMap) aggregatingInsertEvent.getContextInfo("generatedIds")) .put(xmlEvent.getContextInfo("originalId"), xmlEvent.getContextInfo("targetId")); } // add insert position to surrounding betterform-insert-repeatitem or betterform-insert-itemset event else if (xmlEvent.getType().equals(BetterFormEventNames.ITEM_INSERTED)) { XMLEvent tmpEvent = aggregatedInsertEventsStack.pop(); tmpEvent.addProperty("position", xmlEvent.getContextInfo("position")); tmpEvent.addProperty("label", xmlEvent.getContextInfo("label")); tmpEvent.addProperty("value", xmlEvent.getContextInfo("value")); } else if (xmlEvent.getType().equals(BetterFormEventNames.EMBED)) { aggregatedEventList.add(xmlEvent); aggregatedEmbedEventsStack.push(xmlEvent); } else if (xmlEvent.getType().equals(BetterFormEventNames.EMBED_DONE)) { aggregatedEmbedEventsStack.pop().addProperty("targetElement", xmlEvent.getContextInfo("targetElement")); aggregatedEventList.add(xmlEvent); } else if (xmlEvent.getType().equals(XFormsEventNames.FOCUS)) { aggregatedFocusList.push(xmlEvent); } /* else if(xmlEvent.getType().equals(BetterFormEventNames.INDEX_CHANGED)){ aggregatedFocusList.push(xmlEvent); }*/ // all other events within eventList are simply copied to the new eventlist else { aggregatedEventList.add(xmlEvent); } } while (!aggregatedFocusList.isEmpty()) { aggregatedEventList.add(aggregatedFocusList.pollLast()); } return aggregatedEventList; }