From source file:com.datatorrent.stram.StreamingContainerManager.java

 * Compute checkpoints required for a given operator instance to be recovered.
 * This is done by looking at checkpoints available for downstream dependencies first,
 * and then selecting the most recent available checkpoint that is smaller than downstream.
 * @param operator Operator instance for which to find recovery checkpoint
 * @param ctx      Context into which to collect traversal info
 */// ww w . j a v  a2  s . c o m
public void updateRecoveryCheckpoints(PTOperator operator, UpdateCheckpointsContext ctx) {
    if (operator.getRecoveryCheckpoint().windowId < ctx.committedWindowId.longValue()) {

    if (operator.getState() == PTOperator.State.ACTIVE && (ctx.currentTms
            - operator.stats.lastWindowIdChangeTms) > operator.stats.windowProcessingTimeoutMillis) {
        // if the checkpoint is ahead, then it is not blocked but waiting for activation (state-less recovery, at-most-once)
        if (ctx.committedWindowId.longValue() >= operator.getRecoveryCheckpoint().windowId) {
            LOG.debug("Marking operator {} blocked committed window {}, recovery window {}", operator,

    // the most recent checkpoint eligible for recovery based on downstream state
    Checkpoint maxCheckpoint = Checkpoint.INITIAL_CHECKPOINT;

    Set<OperatorMeta> checkpointGroup = ctx.checkpointGroups.get(operator.getOperatorMeta());
    if (checkpointGroup == null) {
        checkpointGroup = Collections.singleton(operator.getOperatorMeta());
    // find intersection of checkpoints that group can collectively move to
    TreeSet<Checkpoint> commonCheckpoints = new TreeSet<>(new Checkpoint.CheckpointComparator());
    synchronized (operator.checkpoints) {
    Set<PTOperator> groupOpers = new HashSet<>(checkpointGroup.size());
    boolean pendingDeploy = operator.getState() == PTOperator.State.PENDING_DEPLOY;
    if (checkpointGroup.size() > 1) {
        for (OperatorMeta om : checkpointGroup) {
            Collection<PTOperator> operators = plan.getAllOperators(om);
            for (PTOperator groupOper : operators) {
                synchronized (groupOper.checkpoints) {
                // visit all downstream operators of the group
                pendingDeploy |= operator.getState() == PTOperator.State.PENDING_DEPLOY;
        // highest common checkpoint
        if (!commonCheckpoints.isEmpty()) {
            maxCheckpoint = commonCheckpoints.last();
    } else {
        // without logical grouping, treat partitions as independent
        // this is especially important for parallel partitioning
        maxCheckpoint = operator.getRecentCheckpoint();
        if (ctx.recovery && maxCheckpoint.windowId == Stateless.WINDOW_ID && operator.isOperatorStateLess()) {
            long currentWindowId = WindowGenerator.getWindowId(ctx.currentTms, this.vars.windowStartMillis,
            maxCheckpoint = new Checkpoint(currentWindowId, 0, 0);

    // DFS downstream operators
    for (PTOperator groupOper : groupOpers) {
        for (PTOperator.PTOutput out : groupOper.getOutputs()) {
            for (PTOperator.PTInput sink : out.sinks) {
                PTOperator sinkOperator = sink.target;
                if (groupOpers.contains(sinkOperator)) {
                    continue; // downstream operator within group
                if (!ctx.visited.contains(sinkOperator)) {
                    // downstream traversal
                    updateRecoveryCheckpoints(sinkOperator, ctx);
                // recovery window id cannot move backwards
                // when dynamically adding new operators
                if (sinkOperator.getRecoveryCheckpoint().windowId >= operator
                        .getRecoveryCheckpoint().windowId) {
                    maxCheckpoint = Checkpoint.min(maxCheckpoint, sinkOperator.getRecoveryCheckpoint());

                if (ctx.blocked.contains(sinkOperator)) {
                    if (sinkOperator.stats.getCurrentWindowId() == operator.stats.getCurrentWindowId()) {
                        // downstream operator is blocked by this operator

    // find the common checkpoint that is <= downstream recovery checkpoint
    if (!commonCheckpoints.contains(maxCheckpoint)) {
        if (!commonCheckpoints.isEmpty()) {
            maxCheckpoint = Objects.firstNonNull(commonCheckpoints.floor(maxCheckpoint), maxCheckpoint);

    for (PTOperator groupOper : groupOpers) {
        // checkpoint frozen during deployment
        if (!pendingDeploy || ctx.recovery) {
            // remove previous checkpoints
            Checkpoint c1 = Checkpoint.INITIAL_CHECKPOINT;
            LinkedList<Checkpoint> checkpoints = groupOper.checkpoints;
            synchronized (checkpoints) {
                if (!checkpoints.isEmpty() && (checkpoints.getFirst()).windowId <= maxCheckpoint.windowId) {
                    c1 = checkpoints.getFirst();
                    Checkpoint c2;
                    while (checkpoints.size() > 1
                            && ((c2 = checkpoints.get(1)).windowId) <= maxCheckpoint.windowId) {
                        //LOG.debug("Checkpoint to delete: operator={} windowId={}", operator.getName(), c1);
                        this.purgeCheckpoints.add(new Pair<PTOperator, Long>(groupOper, c1.windowId));
                        c1 = c2;
                } else {
                    if (ctx.recovery && checkpoints.isEmpty() && groupOper.isOperatorStateLess()) {
                        LOG.debug("Adding checkpoint for stateless operator {} {}", groupOper,
                        c1 = groupOper.addCheckpoint(maxCheckpoint.windowId, this.vars.windowStartMillis);
            //LOG.debug("Operator {} checkpoints: commit {} recent {}", new Object[] {operator.getName(), c1, operator.checkpoints});
        } else {
            LOG.debug("Skipping checkpoint update {} during {}", groupOper, groupOper.getState());


From source file:com.erudika.para.validation.ValidationUtils.java

 * Validates objects.//from  w  w  w  . java  2  s . c o m
 * @param content an object to be validated
 * @param app the current app
 * @return a list of error messages or empty if object is valid
public static String[] validateObject(App app, ParaObject content) {
    if (content == null || app == null) {
        return new String[] { "Object cannot be null." };
    try {
        String type = content.getType();
        boolean isCustomType = (content instanceof Sysprop) && !type.equals(Utils.type(Sysprop.class));
        // Validate custom types and user-defined properties
        if (!app.getValidationConstraints().isEmpty() && isCustomType) {
            Map<String, Map<String, Map<String, ?>>> fieldsMap = app.getValidationConstraints().get(type);
            if (fieldsMap != null && !fieldsMap.isEmpty()) {
                LinkedList<String> errors = new LinkedList<String>();
                for (Map.Entry<String, Map<String, Map<String, ?>>> e : fieldsMap.entrySet()) {
                    String field = e.getKey();
                    Object actualValue = ((Sysprop) content).getProperty(field);
                    // overriding core property validation rules is allowed
                    if (actualValue == null && PropertyUtils.isReadable(content, field)) {
                        actualValue = PropertyUtils.getProperty(content, field);
                    Map<String, Map<String, ?>> consMap = e.getValue();
                    for (Map.Entry<String, Map<String, ?>> constraint : consMap.entrySet()) {
                        String consName = constraint.getKey();
                        Map<String, ?> vals = constraint.getValue();
                        if (vals == null) {
                            vals = Collections.emptyMap();

                        Object val = vals.get("value");
                        Object min = vals.get("min");
                        Object max = vals.get("max");
                        Object in = vals.get("integer");
                        Object fr = vals.get("fraction");

                        if ("required".equals(consName) && !required().isValid(actualValue)) {
                            errors.add(Utils.formatMessage("{0} is required.", field));
                        } else if (matches(Min.class, consName) && !min(val).isValid(actualValue)) {
                                    Utils.formatMessage("{0} must be a number larger than {1}.", field, val));
                        } else if (matches(Max.class, consName) && !max(val).isValid(actualValue)) {
                                    Utils.formatMessage("{0} must be a number smaller than {1}.", field, val));
                        } else if (matches(Size.class, consName) && !size(min, max).isValid(actualValue)) {
                                    Utils.formatMessage("{0} must be between {1} and {2}.", field, min, max));
                        } else if (matches(Email.class, consName) && !email().isValid(actualValue)) {
                            errors.add(Utils.formatMessage("{0} is not a valid email.", field));
                        } else if (matches(Digits.class, consName) && !digits(in, fr).isValid(actualValue)) {
                                    Utils.formatMessage("{0} is not a valid number or within range.", field));
                        } else if (matches(Pattern.class, consName) && !pattern(val).isValid(actualValue)) {
                            errors.add(Utils.formatMessage("{0} doesn't match the pattern {1}.", field, val));
                        } else if (matches(AssertFalse.class, consName) && !falsy().isValid(actualValue)) {
                            errors.add(Utils.formatMessage("{0} must be false.", field));
                        } else if (matches(AssertTrue.class, consName) && !truthy().isValid(actualValue)) {
                            errors.add(Utils.formatMessage("{0} must be true.", field));
                        } else if (matches(Future.class, consName) && !future().isValid(actualValue)) {
                            errors.add(Utils.formatMessage("{0} must be in the future.", field));
                        } else if (matches(Past.class, consName) && !past().isValid(actualValue)) {
                            errors.add(Utils.formatMessage("{0} must be in the past.", field));
                        } else if (matches(URL.class, consName) && !url().isValid(actualValue)) {
                            errors.add(Utils.formatMessage("{0} is not a valid URL.", field));
                if (!errors.isEmpty()) {
                    return errors.toArray(new String[0]);
    } catch (Exception ex) {
        logger.error(null, ex);
    return validateObject(content);

From source file:org.apache.sling.resourceresolver.impl.ResourceResolverImpl.java

 * full implementation - apply sling:alias from the resource path - apply
 * /etc/map mappings (inkl. config backwards compat) - return absolute uri
 * if possible//from  w w w.j  av  a 2 s.co m
 * @see org.apache.sling.api.resource.ResourceResolver#map(javax.servlet.http.HttpServletRequest,
 *      java.lang.String)
public String map(final HttpServletRequest request, final String resourcePath) {

    // find a fragment or query
    int fragmentQueryMark = resourcePath.indexOf('#');
    if (fragmentQueryMark < 0) {
        fragmentQueryMark = resourcePath.indexOf('?');

    // cut fragment or query off the resource path
    String mappedPath;
    final String fragmentQuery;
    if (fragmentQueryMark >= 0) {
        fragmentQuery = resourcePath.substring(fragmentQueryMark);
        mappedPath = resourcePath.substring(0, fragmentQueryMark);
        logger.debug("map: Splitting resource path '{}' into '{}' and '{}'",
                new Object[] { resourcePath, mappedPath, fragmentQuery });
    } else {
        fragmentQuery = null;
        mappedPath = resourcePath;

    // cut off scheme and host, if the same as requested
    final String schemehostport;
    final String schemePrefix;
    if (request != null) {
        schemehostport = MapEntry.getURI(request.getScheme(), request.getServerName(), request.getServerPort(),
        schemePrefix = request.getScheme().concat("://");
        logger.debug("map: Mapping path {} for {} (at least with scheme prefix {})",
                new Object[] { resourcePath, schemehostport, schemePrefix });

    } else {

        schemehostport = null;
        schemePrefix = null;
        logger.debug("map: Mapping path {} for default", resourcePath);


    ParsedParameters parsed = new ParsedParameters(mappedPath);
    final Resource res = resolveInternal(parsed.getRawPath(), parsed.getParameters());

    if (res != null) {

        // keep, what we might have cut off in internal resolution
        final String resolutionPathInfo = res.getResourceMetadata().getResolutionPathInfo();

        logger.debug("map: Path maps to resource {} with path info {}", res, resolutionPathInfo);

        // find aliases for segments. we can't walk the parent chain
        // since the request session might not have permissions to
        // read all parents SLING-2093
        final LinkedList<String> names = new LinkedList<String>();

        Resource current = res;
        String path = res.getPath();
        while (path != null) {
            String alias = null;
            if (current != null && !path.endsWith(JCR_CONTENT_LEAF)) {
                if (factory.getMapEntries().isOptimizeAliasResolutionEnabled()) {
                    logger.debug("map: Optimize Alias Resolution is Enabled");
                    String parentPath = ResourceUtil.getParent(path);
                    if (parentPath != null) {
                        final Map<String, String> aliases = factory.getMapEntries().getAliasMap(parentPath);
                        if (aliases != null && aliases.containsValue(current.getName())) {
                            for (String key : aliases.keySet()) {
                                if (current.getName().equals(aliases.get(key))) {
                                    alias = key;
                } else {
                    logger.debug("map: Optimize Alias Resolution is Disabled");
                    alias = ResourceResolverControl.getProperty(current, PROP_ALIAS);
            if (alias == null || alias.length() == 0) {
                alias = ResourceUtil.getName(path);
            path = ResourceUtil.getParent(path);
            if ("/".equals(path)) {
                path = null;
            } else if (path != null) {
                current = res.getResourceResolver().resolve(path);

        // build path from segment names
        final StringBuilder buf = new StringBuilder();

        // construct the path from the segments (or root if none)
        if (names.isEmpty()) {
        } else {
            while (!names.isEmpty()) {

        // reappend the resolutionPathInfo
        if (resolutionPathInfo != null) {

        // and then we have the mapped path to work on
        mappedPath = buf.toString();

        logger.debug("map: Alias mapping resolves to path {}", mappedPath);


    boolean mappedPathIsUrl = false;
    for (final MapEntry mapEntry : this.factory.getMapEntries().getMapMaps()) {
        final String[] mappedPaths = mapEntry.replace(mappedPath);
        if (mappedPaths != null) {

            logger.debug("map: Match for Entry {}", mapEntry);

            mappedPathIsUrl = !mapEntry.isInternal();

            if (mappedPathIsUrl && schemehostport != null) {

                mappedPath = null;

                for (final String candidate : mappedPaths) {
                    if (candidate.startsWith(schemehostport)) {
                        mappedPath = candidate.substring(schemehostport.length() - 1);
                        mappedPathIsUrl = false;
                        logger.debug("map: Found host specific mapping {} resolving to {}", candidate,
                    } else if (candidate.startsWith(schemePrefix) && mappedPath == null) {
                        mappedPath = candidate;

                if (mappedPath == null) {
                    mappedPath = mappedPaths[0];

            } else {

                // we can only go with assumptions selecting the first entry
                mappedPath = mappedPaths[0];


            logger.debug("resolve: MapEntry {} matches, mapped path is {}", mapEntry, mappedPath);


    // this should not be the case, since mappedPath is primed
    if (mappedPath == null) {
        mappedPath = resourcePath;

    // [scheme:][//authority][path][?query][#fragment]
    try {
        // use commons-httpclient's URI instead of java.net.URI, as it can
        // actually accept *unescaped* URIs, such as the "mappedPath" and
        // return them in proper escaped form, including the path, via
        // toString()
        final URI uri = new URI(mappedPath, false);

        // 1. mangle the namespaces in the path
        String path = mangleNamespaces(uri.getPath());

        // 2. prepend servlet context path if we have a request
        if (request != null && request.getContextPath() != null && request.getContextPath().length() > 0) {
            path = request.getContextPath().concat(path);
        // update the path part of the URI

        mappedPath = uri.toString();
    } catch (final URIException e) {
        logger.warn("map: Unable to mangle namespaces for " + mappedPath + " returning unmangled", e);

    logger.debug("map: Returning URL {} as mapping for path {}", mappedPath, resourcePath);

    // reappend fragment and/or query
    if (fragmentQuery != null) {
        mappedPath = mappedPath.concat(fragmentQuery);

    return mappedPath;

From source file:it.unimi.dsi.sux4j.mph.CHDMinimalPerfectHashFunction.java

 * Creates a new CHD minimal perfect hash function for the given keys.
 * /*from  w  w  w  .  ja  v a2s .c o m*/
 * @param keys the keys to hash, or {@code null}.
 * @param transform a transformation strategy for the keys.
 * @param lambda the average bucket size.
 * @param loadFactor the load factor.
 * @param signatureWidth a signature width, or 0 for no signature.
 * @param tempDir a temporary directory for the store files, or {@code null} for the standard temporary directory.
 * @param chunkedHashStore a chunked hash store containing the keys, or {@code null}; the store
 * can be unchecked, but in this case <code>keys</code> and <code>transform</code> must be non-{@code null}. 
protected CHDMinimalPerfectHashFunction(final Iterable<? extends T> keys,
        final TransformationStrategy<? super T> transform, final int lambda, double loadFactor,
        final int signatureWidth, final File tempDir, ChunkedHashStore<T> chunkedHashStore) throws IOException {
    this.transform = transform;

    final ProgressLogger pl = new ProgressLogger(LOGGER);
    pl.displayLocalSpeed = true;
    pl.displayFreeMemory = true;
    final RandomGenerator r = new XorShift1024StarRandomGenerator();
    pl.itemsName = "keys";

    final boolean givenChunkedHashStore = chunkedHashStore != null;
    if (!givenChunkedHashStore) {
        chunkedHashStore = new ChunkedHashStore<T>(transform, tempDir, pl);
    n = chunkedHashStore.size();

    defRetValue = -1; // For the very few cases in which we can decide

    int log2NumChunks = Math.max(0, Fast.mostSignificantBit(n >> LOG2_CHUNK_SIZE));
    chunkShift = chunkedHashStore.log2Chunks(log2NumChunks);
    final int numChunks = 1 << log2NumChunks;

    LOGGER.debug("Number of chunks: " + numChunks);
    LOGGER.debug("Average chunk size: " + (double) n / numChunks);

    offsetNumBucketsSeed = new long[(numChunks + 1) * 3 + 2];

    int duplicates = 0;
    final LongArrayList holes = new LongArrayList();

    final OfflineIterable<MutableLong, MutableLong> coefficients = new OfflineIterable<MutableLong, MutableLong>(
            new Serializer<MutableLong, MutableLong>() {

                public void write(final MutableLong a, final DataOutput dos) throws IOException {
                    long x = a.longValue();
                    while ((x & ~0x7FL) != 0) {
                        dos.writeByte((int) (x | 0x80));
                        x >>>= 7;
                    dos.writeByte((int) x);

                public void read(final DataInput dis, final MutableLong x) throws IOException {
                    byte b = dis.readByte();
                    long t = b & 0x7F;
                    for (int shift = 7; (b & 0x80) != 0; shift += 7) {
                        b = dis.readByte();
                        t |= (b & 0x7FL) << shift;
            }, new MutableLong());

    for (;;) {
        LOGGER.debug("Generating minimal perfect hash function...");

        pl.expectedUpdates = numChunks;
        pl.itemsName = "chunks";
        pl.start("Analysing chunks... ");

        try {
            int chunkNumber = 0;

            for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
                /* We treat a chunk as a single hash function. The number of bins is thus
                 * the first prime larger than the chunk size divided by the load factor. */
                final int p = Primes.nextPrime((int) Math.ceil(chunk.size() / loadFactor) + 1);
                final boolean used[] = new boolean[p];

                final int numBuckets = (chunk.size() + lambda - 1) / lambda;
                numBuckets(chunkNumber + 1, numBuckets(chunkNumber) + numBuckets);
                final int[] cc0 = new int[numBuckets];
                final int[] cc1 = new int[numBuckets];
                final ArrayList<long[]>[] bucket = new ArrayList[numBuckets];
                for (int i = bucket.length; i-- != 0;)
                    bucket[i] = new ArrayList<long[]>();

                tryChunk: for (;;) {
                    for (ArrayList<long[]> b : bucket)
                    Arrays.fill(used, false);

                    /* At each try, the allocation to keys to bucket is randomized differently. */
                    final long seed = r.nextLong();
                    // System.err.println( "Number of keys: " + chunk.size()  + " Number of bins: " + p + " seed: " + seed );
                    /* We distribute the keys in this chunks in the buckets. */
                    for (Iterator<long[]> iterator = chunk.iterator(); iterator.hasNext();) {
                        final long[] triple = iterator.next();
                        final long[] h = new long[3];
                        Hashes.spooky4(triple, seed, h);
                        final ArrayList<long[]> b = bucket[(int) ((h[0] >>> 1) % numBuckets)];
                        h[1] = (int) ((h[1] >>> 1) % p);
                        h[2] = (int) ((h[2] >>> 1) % (p - 1)) + 1;

                        // All elements in a bucket must have either different h[ 1 ] or different h[ 2 ]
                        for (long[] t : b)
                            if (t[1] == h[1] && t[2] == h[2]) {
                                LOGGER.info("Duplicate index" + Arrays.toString(t));
                                continue tryChunk;

                    final int[] perm = Util.identity(bucket.length);
                    IntArrays.quickSort(perm, new AbstractIntComparator() {
                        private static final long serialVersionUID = 1L;

                        public int compare(int a0, int a1) {
                            return Integer.compare(bucket[a1].size(), bucket[a0].size());

                    for (int i = 0; i < perm.length;) {
                        final LinkedList<Integer> bucketsToDo = new LinkedList<Integer>();
                        final int size = bucket[perm[i]].size();
                        //System.err.println( "Bucket size: " + size );
                        int j;
                        // Gather indices of all buckets with the same size
                        for (j = i; j < perm.length && bucket[perm[j]].size() == size; j++)

                        // Examine for each pair (c0,c1) the buckets still to do
                        ext: for (int c1 = 0; c1 < p; c1++)
                            for (int c0 = 0; c0 < p; c0++) {
                                //System.err.println( "Testing " + c0 + ", " + c1 + " (to do: " + bucketsToDo.size() + ")" );
                                for (Iterator<Integer> iterator = bucketsToDo.iterator(); iterator.hasNext();) {
                                    final int k = iterator.next().intValue();
                                    final ArrayList<long[]> b = bucket[k];
                                    boolean completed = true;
                                    final IntArrayList done = new IntArrayList();
                                    // Try to see whether the necessary entries are not used
                                    for (long[] h : b) {
                                        //assert k == h[ 0 ];

                                        int pos = (int) ((h[1] + c0 * h[2] + c1) % p);
                                        //System.err.println( "Testing pos " + pos + " for " + Arrays.toString( e  ));
                                        if (used[pos]) {
                                            completed = false;
                                        } else {
                                            used[pos] = true;

                                    if (completed) {
                                        // All positions were free
                                        cc0[k] = c0;
                                        cc1[k] = c1;
                                    } else
                                        for (int d : done)
                                            used[d] = false;
                                if (bucketsToDo.isEmpty())
                                    break ext;
                        if (!bucketsToDo.isEmpty())
                            continue tryChunk;

                        seed(chunkNumber, seed);
                        i = j;

                // System.err.println("DONE!");

                if (ASSERTS) {
                    final IntOpenHashSet pos = new IntOpenHashSet();
                    final long h[] = new long[3];
                    for (Iterator<long[]> iterator = chunk.iterator(); iterator.hasNext();) {
                        final long[] triple = iterator.next();
                        Hashes.spooky4(triple, seed(chunkNumber), h);
                        h[0] = (h[0] >>> 1) % numBuckets;
                        h[1] = (int) ((h[1] >>> 1) % p);
                        h[2] = (int) ((h[2] >>> 1) % (p - 1)) + 1;
                        //System.err.println( Arrays.toString(  e  ) );
                        assert pos.add((int) ((h[1] + cc0[(int) (h[0])] * h[2] + cc1[(int) (h[0])]) % p));

                final MutableLong l = new MutableLong();
                for (int i = 0; i < numBuckets; i++) {
                    l.setValue(cc0[i] + cc1[i] * p);

                for (int i = 0; i < p; i++)
                    if (!used[i])
                        holes.add(offset(chunkNumber) + i);

                offset(chunkNumber + 1, offset(chunkNumber) + p);

        } catch (ChunkedHashStore.DuplicateException e) {
            if (keys == null)
                throw new IllegalStateException(
                        "You provided no keys, but the chunked hash store was not checked");
            if (duplicates++ > 3)
                throw new IllegalArgumentException("The input list contains duplicates");
            LOGGER.warn("Found duplicate. Recomputing triples...");

    rank = new SparseRank(offset(offsetNumBucketsSeed.length / 3 - 1), holes.size(), holes.iterator());

    globalSeed = chunkedHashStore.seed();

    this.coefficients = new EliasFanoLongBigList(new AbstractLongIterator() {
        final OfflineIterator<MutableLong, MutableLong> iterator = coefficients.iterator();

        public boolean hasNext() {
            return iterator.hasNext();

        public long nextLong() {
            return iterator.next().longValue();
    }, 0, true);


    LOGGER.info("Actual bit cost per key: " + (double) numBits() / n);

    if (signatureWidth != 0) {
        signatureMask = -1L >>> Long.SIZE - signatureWidth;
        (signatures = LongArrayBitVector.getInstance().asLongBigList(signatureWidth)).size(n);
        pl.expectedUpdates = n;
        pl.itemsName = "signatures";
        for (ChunkedHashStore.Chunk chunk : chunkedHashStore) {
            Iterator<long[]> iterator = chunk.iterator();
            for (int i = chunk.size(); i-- != 0;) {
                final long[] triple = iterator.next();
                long t = getLongByTripleNoCheck(triple);
                signatures.set(t, signatureMask & triple[0]);
    } else {
        signatureMask = 0;
        signatures = null;

    if (!givenChunkedHashStore)

From source file:org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.java

public List<StoragePipelineResult> index(List<URI> inputFiles, URI outdirUri, boolean doExtract,
        boolean doTransform, boolean doLoad) throws StorageEngineException {

    if (inputFiles.size() == 1 || !doLoad) {
        return super.index(inputFiles, outdirUri, doExtract, doTransform, doLoad);
    }/*from   w  ww .j a  v  a  2 s  .  c o m*/

    final boolean doArchive;
    final boolean doMerge;

    if (!getOptions().containsKey(HADOOP_LOAD_ARCHIVE) && !getOptions().containsKey(HADOOP_LOAD_VARIANT)) {
        doArchive = true;
        doMerge = true;
    } else {
        doArchive = getOptions().getBoolean(HADOOP_LOAD_ARCHIVE, false);
        doMerge = getOptions().getBoolean(HADOOP_LOAD_VARIANT, false);

    if (!doArchive && !doMerge) {
        return Collections.emptyList();

    final int nThreadArchive = getOptions().getInt(HADOOP_LOAD_ARCHIVE_BATCH_SIZE, 2);
    ObjectMap extraOptions = new ObjectMap().append(HADOOP_LOAD_ARCHIVE, true).append(HADOOP_LOAD_VARIANT,

    final List<StoragePipelineResult> concurrResult = new CopyOnWriteArrayList<>();
    List<VariantStoragePipeline> etlList = new ArrayList<>();
    ExecutorService executorService = Executors.newFixedThreadPool(nThreadArchive, r -> {
        Thread t = new Thread(r);
        return t;
    }); // Set Daemon for quick shutdown !!!
    LinkedList<Future<StoragePipelineResult>> futures = new LinkedList<>();
    List<Integer> indexedFiles = new CopyOnWriteArrayList<>();
    for (URI inputFile : inputFiles) {
        //Provide a connected storageETL if load is required.

        VariantStoragePipeline storageETL = newStorageETL(doLoad, new ObjectMap(extraOptions));
        futures.add(executorService.submit(() -> {
            try {
                StoragePipelineResult storagePipelineResult = new StoragePipelineResult(inputFile);
                URI nextUri = inputFile;
                boolean error = false;
                if (doTransform) {
                    try {
                        nextUri = transformFile(storageETL, storagePipelineResult, concurrResult, nextUri,

                    } catch (StoragePipelineException ignore) {
                        //Ignore here. Errors are stored in the ETLResult
                        error = true;

                if (doLoad && doArchive && !error) {
                    try {
                        loadFile(storageETL, storagePipelineResult, concurrResult, nextUri, outdirUri);
                    } catch (StoragePipelineException ignore) {
                        //Ignore here. Errors are stored in the ETLResult
                        error = true;
                if (doLoad && !error) {
                    // Read the VariantSource to get the original fileName (it may be different from the
                    // nextUri.getFileName if this is the transformed file)
                    String fileName = storageETL.readVariantSource(nextUri, null).getFileName();
                    // Get latest study configuration from DB, might have been changed since
                    StudyConfiguration studyConfiguration = storageETL.getStudyConfiguration();
                    // Get file ID for the provided file name
                    Integer fileId = studyConfiguration.getFileIds().get(fileName);
                return storagePipelineResult;
            } finally {
                try {
                } catch (StorageEngineException e) {
                    logger.error("Issue closing DB connection ", e);


    int errors = 0;
    try {
        while (!futures.isEmpty()) {
            executorService.awaitTermination(1, TimeUnit.MINUTES);
            // Check values
            if (futures.peek().isDone() || futures.peek().isCancelled()) {
                Future<StoragePipelineResult> first = futures.pop();
                StoragePipelineResult result = first.get(1, TimeUnit.MINUTES);
                if (result.getTransformError() != null) {
                    //TODO: Handle errors. Retry?
                } else if (result.getLoadError() != null) {
                    //TODO: Handle errors. Retry?
        if (errors > 0) {
            throw new StoragePipelineException("Errors found", concurrResult);

        if (doLoad && doMerge) {
            int batchMergeSize = getOptions().getInt(HADOOP_LOAD_VARIANT_BATCH_SIZE, 10);
            // Overwrite default ID list with user provided IDs
            List<Integer> pendingFiles = indexedFiles;
            if (getOptions().containsKey(HADOOP_LOAD_VARIANT_PENDING_FILES)) {
                List<Integer> idList = getOptions().getAsIntegerList(HADOOP_LOAD_VARIANT_PENDING_FILES);
                if (!idList.isEmpty()) {
                    // only if the list is not empty
                    pendingFiles = idList;

            List<Integer> filesToMerge = new ArrayList<>(batchMergeSize);
            int i = 0;
            for (Iterator<Integer> iterator = pendingFiles.iterator(); iterator.hasNext(); i++) {
                Integer indexedFile = iterator.next();
                if (filesToMerge.size() == batchMergeSize || !iterator.hasNext()) {
                    extraOptions = new ObjectMap().append(HADOOP_LOAD_ARCHIVE, false)
                            .append(HADOOP_LOAD_VARIANT, true)
                            .append(HADOOP_LOAD_VARIANT_PENDING_FILES, filesToMerge);

                    AbstractHadoopVariantStoragePipeline localEtl = newStorageETL(doLoad, extraOptions);

                    int studyId = getOptions().getInt(Options.STUDY_ID.key());
                    localEtl.preLoad(inputFiles.get(i), outdirUri);
                    localEtl.merge(studyId, filesToMerge);
                    localEtl.postLoad(inputFiles.get(i), outdirUri);

            annotateLoadedFiles(outdirUri, inputFiles, concurrResult, getOptions());
            calculateStatsForLoadedFiles(outdirUri, inputFiles, concurrResult, getOptions());

    } catch (InterruptedException e) {
        throw new StoragePipelineException("Interrupted!", e, concurrResult);
    } catch (ExecutionException e) {
        throw new StoragePipelineException("Execution exception!", e, concurrResult);
    } catch (TimeoutException e) {
        throw new StoragePipelineException("Timeout Exception", e, concurrResult);
    } finally {
        if (!executorService.isShutdown()) {
            try {
            } catch (Exception e) {
                logger.error("Problems shutting executer service down", e);
    return concurrResult;

From source file:com.ikanow.aleph2.management_db.services.DataBucketCrudService.java

/** Validates whether the new or updated bucket is valid: both in terms of authorization and in terms of format
 * @param bucket/*from  w ww  .  j a v a2 s  . c o  m*/
 * @return
 * @throws ExecutionException 
 * @throws InterruptedException 
protected Tuple2<DataBucketBean, Collection<BasicMessageBean>> validateBucket(final DataBucketBean bucket,
        final Optional<DataBucketBean> old_version, boolean do_full_checks, final boolean allow_system_names)
        throws InterruptedException, ExecutionException {

    // (will live with this being mutable)
    final LinkedList<BasicMessageBean> errors = new LinkedList<BasicMessageBean>();

    final JsonNode bucket_json = BeanTemplateUtils.toJson(bucket);


    // PHASE 1

    // Check for missing fields

            .filter(s -> !bucket_json.has(s)
                    || (bucket_json.get(s).isTextual() && bucket_json.get(s).asText().isEmpty()))
            .forEach(s -> errors.add(MgmtCrudUtils

    // We have a full name if we're here, so no check for uniqueness

    // Check for some bucket path restrictions
    if (null != bucket.full_name()) {
        if (!BucketValidationUtils.bucketPathFormatValidationCheck(bucket.full_name())) {

            return Tuples._2T(bucket, errors); // (this is catastrophic obviously)         

        if (!old_version.isPresent()) { // (create not update)
            if (do_full_checks) {
                if (this._underlying_data_bucket_db.get().countObjectsBySpec(CrudUtils
                        .allOf(DataBucketBean.class).when(DataBucketBean::full_name, bucket.full_name()))
                        .get() > 0) {

                    return Tuples._2T(bucket, errors); // (this is catastrophic obviously)
    } else
        return Tuples._2T(bucket, errors); // (this is catastrophic obviously)

    // Some static validation moved into a separate function for testability

    errors.addAll(BucketValidationUtils.staticValidation(bucket, allow_system_names));

    // OK before I do any more stateful checking, going to stop if we have logic errors first 

    if (!errors.isEmpty()) {
        return Tuples._2T(bucket, errors);


    // PHASE 2

    //TODO (ALEPH-19): multi buckets - authorization; other - authorization

    if (do_full_checks) {

        final CompletableFuture<Collection<BasicMessageBean>> bucket_path_errors_future = validateOtherBucketsInPathChain(


        // OK before I do any more stateful checking, going to stop if we have logic errors first 

        if (!errors.isEmpty()) {
            return Tuples._2T(bucket, errors);


    // PHASE 3

    // Finally Check whether I am allowed to update the various fields if old_version.isPresent()

    if (old_version.isPresent()) {
        final DataBucketBean old_bucket = old_version.get();
        if (!bucket.full_name().equals(old_bucket.full_name())) {
                            bucket.full_name(), old_bucket.full_name())));
        if (!bucket.owner_id().equals(old_bucket.owner_id())) {
                            bucket.full_name(), old_bucket.owner_id())));



    Tuple2<Map<String, String>, List<BasicMessageBean>> schema_validation = BucketValidationUtils
            .validateSchema(bucket, _service_context);


    return Tuples._2T(
            BeanTemplateUtils.clone(bucket).with(DataBucketBean::data_locations, schema_validation._1()).done(),

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java

 * The method that creates the Job corresponding to a MapReduceOper.
 * The assumption is that/*from   w  ww  .j a v a  2 s .  co m*/
 * every MapReduceOper will have a load and a store. The JobConf removes
 * the load operator and serializes the input filespec so that PigInputFormat can
 * take over the creation of splits. It also removes the store operator
 * and serializes the output filespec so that PigOutputFormat can take over
 * record writing. The remaining portion of the map plan and reduce plans are
 * serialized and stored for the PigMapReduce or PigMapOnly objects to take over
 * the actual running of the plans.
 * The Mapper &amp; Reducer classes and the required key value formats are set.
 * Checks if this is a map only job and uses PigMapOnly class as the mapper
 * and uses PigMapReduce otherwise.
 * If it is a Map Reduce job, it is bound to have a package operator. Remove it from
 * the reduce plan and serializes it so that the PigMapReduce class can use it to package
 * the indexed tuples received by the reducer.
 * @param mro - The MapReduceOper for which the JobConf is required
 * @param config - the Configuration object from which JobConf is built
 * @param pigContext - The PigContext passed on from execution engine
 * @return Job corresponding to mro
 * @throws JobCreationException
@SuppressWarnings({ "unchecked" })
private Job getJob(MROperPlan plan, MapReduceOper mro, Configuration config, PigContext pigContext)
        throws JobCreationException {
    org.apache.hadoop.mapreduce.Job nwJob = null;

    try {
        nwJob = new org.apache.hadoop.mapreduce.Job(config);
    } catch (Exception e) {
        throw new JobCreationException(e);

    Configuration conf = nwJob.getConfiguration();

    ArrayList<FileSpec> inp = new ArrayList<FileSpec>();
    ArrayList<List<OperatorKey>> inpTargets = new ArrayList<List<OperatorKey>>();
    ArrayList<String> inpSignatureLists = new ArrayList<String>();
    ArrayList<Long> inpLimits = new ArrayList<Long>();
    ArrayList<POStore> storeLocations = new ArrayList<POStore>();
    Path tmpLocation = null;

    // add settings for pig statistics
    String setScriptProp = conf.get(PigConfiguration.PIG_SCRIPT_INFO_ENABLED, "true");
    if (setScriptProp.equalsIgnoreCase("true")) {
        MRScriptState ss = MRScriptState.get();
        ss.addSettingsToConf(mro, conf);

    conf.set(MRConfiguration.MAPPER_NEW_API, "true");
    conf.set(MRConfiguration.REDUCER_NEW_API, "true");

    String buffPercent = conf.get(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT);
    if (buffPercent == null || Double.parseDouble(buffPercent) <= 0) {
        log.info(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT + " is not set, set to default 0.3");
        conf.set(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT, "0.3");
    } else {
        log.info(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT + " is set to "
                + conf.get(MRConfiguration.JOB_REDUCE_MARKRESET_BUFFER_PERCENT));


    try {
        //Process the POLoads
        List<POLoad> lds = PlanHelper.getPhysicalOperators(mro.mapPlan, POLoad.class);

        if (lds != null && lds.size() > 0) {
            for (POLoad ld : lds) {
                LoadFunc lf = ld.getLoadFunc();
                lf.setLocation(ld.getLFile().getFileName(), nwJob);

                //Store the inp filespecs

        if (!mro.reducePlan.isEmpty()) {
            log.info("Reduce phase detected, estimating # of required reducers.");
            adjustNumReducers(plan, mro, nwJob);
        } else {

        if (!pigContext.inIllustrator && !pigContext.getExecType().isLocal()) {
            if (okToRunLocal(nwJob, mro, lds)) {
                // override with the default conf to run in local mode
                for (Entry<String, String> entry : defaultConf) {
                    String key = entry.getKey();
                    if (key.equals(MRConfiguration.REDUCE_TASKS) || key.equals(MRConfiguration.JOB_REDUCES)) {
                        // this must not be set back to the default in case it has been set to 0 for example.
                    if (key.startsWith("fs.")) {
                        // we don't want to change fs settings back
                    if (key.startsWith("io.")) {
                        // we don't want to change io settings back
                    String value = entry.getValue();
                    if (conf.get(key) == null || !conf.get(key).equals(value)) {
                        conf.set(key, value);

                conf.setBoolean(PigImplConstants.CONVERTED_TO_LOCAL, true);
            } else {
                // Search to see if we have any UDF/LoadFunc/StoreFunc that need to pack things into the
                // distributed cache.
                List<String> cacheFiles = new ArrayList<String>();
                List<String> shipFiles = new ArrayList<String>();
                UdfCacheShipFilesVisitor mapUdfCacheFileVisitor = new UdfCacheShipFilesVisitor(mro.mapPlan);

                UdfCacheShipFilesVisitor reduceUdfCacheFileVisitor = new UdfCacheShipFilesVisitor(

                setupDistributedCache(pigContext, conf, cacheFiles.toArray(new String[] {}), false);

                // Setup the DistributedCache for this job
                List<URL> allJars = new ArrayList<URL>();

                for (URL extraJar : pigContext.extraJars) {
                    if (!allJars.contains(extraJar)) {

                for (String udf : mro.UDFs) {
                    Class clazz = pigContext.getClassForAlias(udf);
                    if (clazz != null) {
                        String jar = JarManager.findContainingJar(clazz);
                        if (jar != null) {
                            URL jarURL = new File(jar).toURI().toURL();
                            if (!allJars.contains(jarURL)) {

                for (String scriptJar : pigContext.scriptJars) {
                    URL jar = new File(scriptJar).toURI().toURL();
                    if (!allJars.contains(jar)) {

                for (String shipFile : shipFiles) {
                    URL jar = new File(shipFile).toURI().toURL();
                    if (!allJars.contains(jar)) {

                for (String defaultJar : JarManager.getDefaultJars()) {
                    URL jar = new File(defaultJar).toURI().toURL();
                    if (!allJars.contains(jar)) {

                for (URL jar : allJars) {
                    boolean predeployed = false;
                    for (String predeployedJar : pigContext.predeployedJars) {
                        if (predeployedJar.contains(new File(jar.toURI()).getName())) {
                            predeployed = true;
                    if (!predeployed) {
                        if (jar.getFile().toLowerCase().endsWith(".jar")) {
                            putJarOnClassPathThroughDistributedCache(pigContext, conf, jar);
                        } else {
                            setupDistributedCache(pigContext, conf, new String[] { jar.getPath() }, true);

                File scriptUDFJarFile = JarManager.createPigScriptUDFJar(pigContext);
                if (scriptUDFJarFile != null) {
                    putJarOnClassPathThroughDistributedCache(pigContext, conf,

        for (String udf : mro.UDFs) {
            if (udf.contains("GFCross")) {
                Object func = PigContext.instantiateFuncFromSpec(new FuncSpec(udf));
                if (func instanceof GFCross) {
                    String crossKey = ((GFCross) func).getCrossKey();
                    conf.set(PigImplConstants.PIG_CROSS_PARALLELISM + "." + crossKey,

        if (lds != null && lds.size() > 0) {
            for (POLoad ld : lds) {
                //Store the target operators for tuples read
                //from this input
                List<PhysicalOperator> ldSucs = mro.mapPlan.getSuccessors(ld);
                List<OperatorKey> ldSucKeys = new ArrayList<OperatorKey>();
                if (ldSucs != null) {
                    for (PhysicalOperator operator2 : ldSucs) {
                //Remove the POLoad from the plan
                if (!pigContext.inIllustrator)

        if (Utils.isLocal(pigContext, conf)) {
        conf.set(PigInputFormat.PIG_INPUTS, ObjectSerializer.serialize(inp));
        conf.set(PigInputFormat.PIG_INPUT_TARGETS, ObjectSerializer.serialize(inpTargets));
        conf.set(PigInputFormat.PIG_INPUT_SIGNATURES, ObjectSerializer.serialize(inpSignatureLists));
        conf.set(PigInputFormat.PIG_INPUT_LIMITS, ObjectSerializer.serialize(inpLimits));

        // Removing job credential entry before serializing pigcontext into jobconf
        // since this path would be invalid for the new job being created

        conf.setBoolean(PigImplConstants.PIG_EXECTYPE_MODE_LOCAL, pigContext.getExecType().isLocal());
        conf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList()));
        // this is for unit tests since some don't create PigServer

        // if user specified the job name using -D switch, Pig won't reset the name then.
        if (System.getProperty(MRConfiguration.JOB_NAME) == null
                && pigContext.getProperties().getProperty(PigContext.JOB_NAME) != null) {

        if (pigContext.getProperties().getProperty(PigContext.JOB_PRIORITY) != null) {
            // If the job priority was set, attempt to get the corresponding enum value
            // and set the hadoop job priority.
            String jobPriority = pigContext.getProperties().getProperty(PigContext.JOB_PRIORITY).toUpperCase();
            try {
                // Allow arbitrary case; the Hadoop job priorities are all upper case.
                conf.set(MRConfiguration.JOB_PRIORITY, JobPriority.valueOf(jobPriority).toString());

            } catch (IllegalArgumentException e) {
                StringBuffer sb = new StringBuffer("The job priority must be one of [");
                JobPriority[] priorities = JobPriority.values();
                for (int i = 0; i < priorities.length; ++i) {
                    if (i > 0)
                        sb.append(", ");
                sb.append("].  You specified [" + jobPriority + "]");
                throw new JobCreationException(sb.toString());

        setupDistributedCache(pigContext, conf, pigContext.getProperties(), "pig.streaming.ship.files", true);
        setupDistributedCache(pigContext, conf, pigContext.getProperties(), "pig.streaming.cache.files", false);


        // tmp file compression setups
        // PIG-3741 This must be done before setStoreLocation on POStores
        Utils.setTmpFileCompressionOnConf(pigContext, conf);

        //Process POStore and remove it from the plan
        LinkedList<POStore> mapStores = PlanHelper.getPhysicalOperators(mro.mapPlan, POStore.class);
        LinkedList<POStore> reduceStores = PlanHelper.getPhysicalOperators(mro.reducePlan, POStore.class);

        for (POStore st : mapStores) {
            StoreFuncInterface sFunc = st.getStoreFunc();
            sFunc.setStoreLocation(st.getSFile().getFileName(), nwJob);
            if (sFunc instanceof OverwritableStoreFunc) {
                OverwritableStoreFunc osf = (OverwritableStoreFunc) sFunc;
                if (osf.shouldOverwrite()) {
                    osf.cleanupOutput(st, nwJob);

        for (POStore st : reduceStores) {
            StoreFuncInterface sFunc = st.getStoreFunc();
            sFunc.setStoreLocation(st.getSFile().getFileName(), nwJob);
            if (sFunc instanceof OverwritableStoreFunc) {
                OverwritableStoreFunc osf = (OverwritableStoreFunc) sFunc;
                if (osf.shouldOverwrite()) {
                    osf.cleanupOutput(st, nwJob);


        if (mapStores.size() + reduceStores.size() == 1) { // single store case
            log.info("Setting up single store job");

            POStore st;
            if (reduceStores.isEmpty()) {
                st = mapStores.get(0);
                if (!pigContext.inIllustrator)
            } else {
                st = reduceStores.get(0);
                if (!pigContext.inIllustrator)

            MapRedUtil.setupStreamingDirsConfSingle(st, pigContext, conf);
        } else if (mapStores.size() + reduceStores.size() > 0) { // multi store case
            log.info("Setting up multi store job");
            MapRedUtil.setupStreamingDirsConfMulti(pigContext, conf);

            boolean disableCounter = conf.getBoolean("pig.disable.counter", false);
            if (disableCounter) {
                log.info("Disable Pig custom output counters");
            int idx = 0;
            for (POStore sto : storeLocations) {

        // store map key type
        // this is needed when the key is null to create
        // an appropriate NullableXXXWritable object
        conf.set("pig.map.keytype", ObjectSerializer.serialize(new byte[] { mro.mapKeyType }));

        // set parent plan in all operators in map and reduce plans
        // currently the parent plan is really used only when POStream is present in the plan
        new PhyPlanSetter(mro.mapPlan).visit();
        new PhyPlanSetter(mro.combinePlan).visit();
        new PhyPlanSetter(mro.reducePlan).visit();

        // this call modifies the ReplFiles names of POFRJoin operators
        // within the MR plans, must be called before the plans are
        // serialized
        setupDistributedCacheForJoin(mro, pigContext, conf);

        SchemaTupleFrontend.copyAllGeneratedToDistributedCache(pigContext, conf);

        POPackage pack = null;
        if (mro.reducePlan.isEmpty()) {
            //MapOnly Job
            if (!pigContext.inIllustrator)
                conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
            if (mro.isEndOfAllInputSetInMap()) {
                // this is used in Map.close() to decide whether the
                // pipeline needs to be rerun one more time in the close()
                // The pipeline is rerun if there either was a stream or POMergeJoin
                conf.set(END_OF_INP_IN_MAP, "true");
        } else {
            //Map Reduce Job
            //Process the POPackage operator and remove it from the reduce plan
            if (!mro.combinePlan.isEmpty()) {
                POPackage combPack = (POPackage) mro.combinePlan.getRoots().get(0);
                conf.set("pig.combinePlan", ObjectSerializer.serialize(mro.combinePlan));
                conf.set("pig.combine.package", ObjectSerializer.serialize(combPack));
            } else if (mro.needsDistinctCombiner()) {
                log.info("Setting identity combiner class.");
            pack = (POPackage) mro.reducePlan.getRoots().get(0);

            if (!pigContext.inIllustrator) {

            if (mro.customPartitioner != null)

            if (!pigContext.inIllustrator)
                conf.set("pig.mapPlan", ObjectSerializer.serialize(mro.mapPlan));
            if (mro.isEndOfAllInputSetInMap()) {
                // this is used in Map.close() to decide whether the
                // pipeline needs to be rerun one more time in the close()
                // The pipeline is rerun only if there was a stream or merge-join.
                conf.set(END_OF_INP_IN_MAP, "true");
            if (!pigContext.inIllustrator)
                conf.set("pig.reducePlan", ObjectSerializer.serialize(mro.reducePlan));
            if (mro.isEndOfAllInputSetInReduce()) {
                // this is used in Map.close() to decide whether the
                // pipeline needs to be rerun one more time in the close()
                // The pipeline is rerun only if there was a stream
                conf.set("pig.stream.in.reduce", "true");
            if (!pigContext.inIllustrator)
                conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
            conf.set("pig.reduce.key.type", Byte.toString(pack.getPkgr().getKeyType()));

            if (mro.getUseSecondaryKey()) {
                conf.set("pig.secondarySortOrder", ObjectSerializer.serialize(mro.getSecondarySortOrder()));

            } else {
                Class<? extends WritableComparable> keyClass = HDataType
                selectComparator(mro, pack.getPkgr().getKeyType(), nwJob);

        if (mro.isGlobalSort() || mro.isLimitAfterSort()) {
            if (mro.isGlobalSort()) {
                String symlink = addSingleFileToDistributedCache(pigContext, conf, mro.getQuantFile(),
                conf.set("pig.quantilesFile", symlink);

            if (mro.isUDFComparatorUsed) {
                boolean usercomparator = false;
                for (String compFuncSpec : mro.UDFs) {
                    Class comparator = PigContext.resolveClassName(compFuncSpec);
                    if (ComparisonFunc.class.isAssignableFrom(comparator)) {
                        conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
                        conf.set("pig.usercomparator", "true");
                        usercomparator = true;
                if (!usercomparator) {
                    String msg = "Internal error. Can't find the UDF comparator";
                    throw new IOException(msg);

            } else {
                conf.set("pig.sortOrder", ObjectSerializer.serialize(mro.getSortOrder()));

        if (mro.isSkewedJoin()) {
            String symlink = addSingleFileToDistributedCache(pigContext, conf, mro.getSkewedJoinPartitionFile(),
            conf.set("pig.keyDistFile", symlink);

        if (mro.isCounterOperation()) {
            if (mro.isRowNumber()) {
            } else {

        if (mro.isRankOperation()) {
            Iterator<String> operationIDs = mro.getRankOperationId().iterator();

            while (operationIDs.hasNext()) {
                String operationID = operationIDs.next();
                Iterator<Pair<String, Long>> itPairs = globalCounters.get(operationID).iterator();
                Pair<String, Long> pair = null;
                while (itPairs.hasNext()) {
                    pair = itPairs.next();
                    conf.setLong(pair.first, pair.second);

        if (!pigContext.inIllustrator) {
            // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized
            for (POStore st : mapStores) {
            for (POStore st : reduceStores) {
            conf.set(PIG_MAP_STORES, ObjectSerializer.serialize(mapStores));
            conf.set(PIG_REDUCE_STORES, ObjectSerializer.serialize(reduceStores));

        String tmp;
        long maxCombinedSplitSize = 0;
        if (!mro.combineSmallSplits()
                || pigContext.getProperties().getProperty("pig.splitCombination", "true").equals("false"))
            conf.setBoolean("pig.noSplitCombination", true);
        else if ((tmp = pigContext.getProperties().getProperty("pig.maxCombinedSplitSize", null)) != null) {
            try {
                maxCombinedSplitSize = Long.parseLong(tmp);
            } catch (NumberFormatException e) {
                        "Invalid numeric format for pig.maxCombinedSplitSize; use the default maximum combined split size");
        if (maxCombinedSplitSize > 0)
            conf.setLong("pig.maxCombinedSplitSize", maxCombinedSplitSize);

        // It's a hack to set distributed cache file for hadoop 23. Once MiniMRCluster do not require local
        // jar on fixed location, this can be removed
        if (pigContext.getExecType() == ExecType.MAPREDUCE) {
            String newfiles = conf.get("alternative.mapreduce.job.cache.files");
            if (newfiles != null) {
                String files = conf.get(MRConfiguration.JOB_CACHE_FILES);
                        files == null ? newfiles.toString() : files + "," + newfiles);
        // Serialize the UDF specific context info.
        Job cjob = new Job(new JobConf(conf), new ArrayList<Job>());
        jobStoreMap.put(cjob, new Pair<List<POStore>, Path>(storeLocations, tmpLocation));
        return cjob;

    } catch (JobCreationException jce) {
        throw jce;
    } catch (Exception e) {
        int errCode = 2017;
        String msg = "Internal error creating job configuration.";
        throw new JobCreationException(msg, errCode, PigException.BUG, e);

From source file:Commands.AddShoesCommand.java

public String executeCommand(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    String forwardToJsp = "";
    HttpSession session = request.getSession(true);
    ShoesDao sd = new ShoesDao();
    boolean check = true;
    ArrayList<String> addList = new ArrayList<>();
    ArrayList<Shoes> list = new ArrayList<>();
    HashMap<Integer, LinkedList<String>> status = new HashMap<>();
    HashMap<Integer, LinkedList<Integer>> color = new HashMap<>();
    LinkedList<String> s = null;
    LinkedList<Integer> c = null;
    if (session.getAttribute("userLogin") != null && ((User) session.getAttribute("userLogin")).is_Admin()) {
        if (request.getParameter("number") != null) {
            int num = Integer.parseInt(request.getParameter("number"));
            for (int n = 1; n < num; n++) {
                s = new LinkedList<>();
                c = new LinkedList<>();
                boolean check1 = true;
                if (request.getParameter("name-" + n) != null && request.getParameter("brand-" + n) != null
                        && request.getParameter("sport-" + n) != null) {
                    if (request.getParameter("name-" + n).isEmpty()) {
                        session.setAttribute("errorMsg", "The name cannot be empty");
                        return forwardToJsp = "AddShoes.jsp";
                    }//w  w w .jav a2 s. com
                    String name = (String) request.getParameter("name-" + n).substring(0, 1).toUpperCase()
                            + request.getParameter("name-" + n).substring(1).toLowerCase();
                    int brand = Integer.parseInt(request.getParameter("brand-" + n));
                    int sport = Integer.parseInt(request.getParameter("sport-" + n));
                    if (request.getParameter("price-" + n).isEmpty()) {
                        s.add("price cannot be empty");
                    double price = Double.parseDouble(request.getParameter("price-" + n));
                    if (price < 1 || price > 200) {
                        s.add("price range is between 1 to 200");
                    if (!sd.findShoes(name).isEmpty()) {
                        s.add("The name is repeated");

                    boolean repeat = false;
                    for (int i = 1; i < 4; i++) {
                        if (request.getParameter("color" + i + "-" + n) != null) {
                            int id = Integer.parseInt(request.getParameter("color" + i + "-" + n));
                            if (c.contains(id)) {
                                repeat = true;
                    if (repeat) {
                        s.add("The color is repeated");

                    //                        String[] files1 = request.getParameterValues("file1-" + n);
                    //                        String[] files2 = request.getParameterValues("file2-" + n);
                    //                        String[] files3 = request.getParameterValues("file3-" + n);
                    //                        long a=Arrays.stream(files1).filter((String st) -> !st.isEmpty()).count();
                    //                        long b=Arrays.stream(files1).filter((String st) -> !st.isEmpty()).count();
                    //                        long d=Arrays.stream(files1).filter((String st) -> !st.isEmpty()).count();
                    //                        if(a==0 || b==0 || d==0){
                    //                            s.add("Images is not uploaded");
                    //                        }
                    //                        p.add(files1);
                    //                        p.add(files2);
                    //                        p.add(files3);
                    if (!s.isEmpty()) {
                        status.put(n, s);
                    color.put(n, c);
                    list.add(new Shoes(n, brand, 0, sport, name, price, ""));
                } else {
                    check = false;
            ColorDao cd = new ColorDao();

            session.setAttribute("list", list);
            session.setAttribute("status", status);
            session.setAttribute("allcolor", color);
            if (status.isEmpty() && check) {
                for (int i = 0; i < list.size(); i++) {
                    c = color.get(i + 1);
                    Iterator<Integer> iter = c.iterator();
                    int count = 1;

                    while (iter.hasNext()) {
                        String name = list.get(i).getName();
                        int colorId = iter.next();
                        String colorName = cd.findColorById(colorId).getColor_Name();
                        String pic = name + "-" + colorName + "-";
                        sd.addShoes(list.get(i).getBrandID(), colorId, list.get(i).getTypeID(), name,
                                list.get(i).getPrice(), pic);
                        String colo = request.getParameter("cr" + count + "-" + (i + 1));
                        String[] col = colo.split(",");
                        String UPLOAD_DIRECTORY = request.getServletContext().getRealPath("") + File.separator
                                + "img" + File.separator;
                        int count1 = 1;
                        for (String str : col) {
                            File file = new File(UPLOAD_DIRECTORY + str.substring(4));
                            File f = new File(UPLOAD_DIRECTORY + pic + count1 + ".jpg");
                            try {
                                boolean check1 = file.renameTo(f);
                                if (check1 == false) {
                                            str.substring(4) + " " + UPLOAD_DIRECTORY + pic);
                                    return "AddShoes.jsp";
                            } catch (SecurityException | NullPointerException se) {
                                session.setAttribute("errorMsg", Arrays.toString(se.getStackTrace()));
                                return "AddShoes.jsp";


                session.setAttribute("errorMsg", "Shoes is successful added");
                //                    session.removeAttribute("list");
                //                    session.removeAttribute("allcolor");
                //                    session.removeAttribute("status");

            } else {
                session.setAttribute("errorMsg", "Please fill the form with correct information");
                forwardToJsp = "AddShoes.jsp";

        } else {
            session.setAttribute("errorMsg", "Fail to save changes, please refresh the page and try again");
            forwardToJsp = "shoes.jsp";
    } else {
        session.setAttribute("errorMsg", "You are not allowed to access this page");
        forwardToJsp = "index.jsp";
    return forwardToJsp;

From source file:elh.eus.absa.Features.java

 *     Lemma ngram extraction from a kaf document
 * /*from  ww w  .  j  av  a 2s.  co  m*/
 * @param int length : which 'n' use for 'n-grams' 
 * @param KAFDocument kafDoc : postagged kaf document to extract ngrams from.
 * @param boolean save : safe ngrams to file or not. 
 * @return TreeSet<String> return lemma ngrams of length length
private int extractLemmaNgrams(int length, KAFDocument kafDoc, List<String> discardPos, boolean save) {
    //System.err.println("lemma ngram extraction: _"+length+"_");
    if (length == 0) {
        return 0;

    int sentNum = kafDoc.getSentences().size();
    for (int s = 0; s < sentNum; s++) {
        LinkedList<String> ngrams = new LinkedList<String>();
        for (Term term : kafDoc.getTermsBySent(s)) {
            if (ngrams.size() >= length) {

            //if no alphanumeric char is present discard the element as invalid ngram. Or if it has a PoS tag that
            //should be discarded              
            String lCurrent = term.getLemma();
            if ((!discardPos.contains(term.getPos()))
                    && (!lCurrent.matches("[^\\p{L}\\p{M}\\p{Nd}\\p{InEmoticons}]+"))
                    && (lCurrent.length() > 1)) {
                //ngrams.add(normalize(term.getLemma(), params.getProperty("normalization", "none")));
            //certain punctuation marks and emoticons are allowed as lemmas
            else if ((lCurrent.length() <= 2) && (lCurrent.matches("[,;.?!]"))) {

            // add ngrams to the feature list
            for (int i = 0; i < ngrams.size(); i++) {
                String ng = featureFromArray(ngrams.subList(0, i + 1), "lemma");
                addNgram("lemma", ng);
        //empty ngram list and add remaining ngrams to the feature list
        while (!ngrams.isEmpty()) {
            String ng = featureFromArray(ngrams, "lemma");
            addNgram("lemma", ng);
    return 1;

From source file:de.betterform.agent.web.event.EventQueue.java

public List<XMLEvent> aggregateEventList() {
    // Stack is used to "navigate" through the event list
    LinkedList<XMLEvent> aggregatedFocusList = new LinkedList<XMLEvent>();
    Stack<XMLEvent> aggregatedInsertEventsStack = new Stack();
    Stack<XMLEvent> aggregatedEmbedEventsStack = new Stack();
    ArrayList<XMLEvent> aggregatedEventList = new ArrayList<XMLEvent>(eventList.size());

    for (XMLEvent xmlEvent : this.loadEmbedEventList) {
    }// ww w  .j ava2s . c  o m


    for (int i = 0; i < eventList.size(); i++) {
        XercesXMLEvent xmlEvent = (XercesXMLEvent) eventList.get(i);

        XercesXMLEvent xmlEventToAdd = new XercesXMLEvent();
        // Map PROTOTYPE_CLONED event to betterform-insert-repeatitem or betterform-insert-itemset event
        // and copy event properties to new created XMLEvent
        if (xmlEvent.getType().equals(BetterFormEventNames.PROTOTYPE_CLONED)) {
            if (xmlEvent.getContextInfo("targetName").equals(XFormsConstants.ITEMSET)) {
                xmlEventToAdd.initXMLEvent("betterform-insert-itemset", xmlEvent.getBubbles(),
                        xmlEvent.getCancelable(), xmlEvent.getContextInfo());
            } else {
                xmlEventToAdd.initXMLEvent("betterform-insert-repeatitem", xmlEvent.getBubbles(),
                        xmlEvent.getCancelable(), xmlEvent.getContextInfo());
            xmlEventToAdd.target = xmlEvent.target;
            xmlEvent.addProperty("generatedIds", new HashMap());
            // push XMLEvent to Stack for further processing

        // add all generated ids to surrounding betterform-insert-repeatitem or betterform-insert-itemset event
        else if (xmlEvent.getType().equals(BetterFormEventNames.ID_GENERATED)
                && aggregatedInsertEventsStack.size() > 0) {
            XMLEvent aggregatingInsertEvent = aggregatedInsertEventsStack.peek();
            ((HashMap) aggregatingInsertEvent.getContextInfo("generatedIds"))
                    .put(xmlEvent.getContextInfo("originalId"), xmlEvent.getContextInfo("targetId"));
        // add insert position to surrounding betterform-insert-repeatitem or betterform-insert-itemset event
        else if (xmlEvent.getType().equals(BetterFormEventNames.ITEM_INSERTED)) {
            XMLEvent tmpEvent = aggregatedInsertEventsStack.pop();
            tmpEvent.addProperty("position", xmlEvent.getContextInfo("position"));
            tmpEvent.addProperty("label", xmlEvent.getContextInfo("label"));
            tmpEvent.addProperty("value", xmlEvent.getContextInfo("value"));

        } else if (xmlEvent.getType().equals(BetterFormEventNames.EMBED)) {
        } else if (xmlEvent.getType().equals(BetterFormEventNames.EMBED_DONE)) {
        } else if (xmlEvent.getType().equals(XFormsEventNames.FOCUS)) {
        /* else if(xmlEvent.getType().equals(BetterFormEventNames.INDEX_CHANGED)){
        // all other events within eventList are simply copied to the new eventlist
        else {

    while (!aggregatedFocusList.isEmpty()) {
    return aggregatedEventList;