Example usage for java.util TreeMap remove

List of usage examples for java.util TreeMap remove

Introduction

In this page you can find the example usage for java.util TreeMap remove.

Prototype

public V remove(Object key) 

Source Link

Document

Removes the mapping for this key from this TreeMap if present.

Usage

From source file:org.ecocean.Encounter.java

public void removeDynamicProperty(String name) {
    name = name.replaceAll(";", "_").trim().replaceAll("%20", " ");
    if (dynamicProperties != null) {

        //let's create a TreeMap of the properties
        TreeMap<String, String> tm = new TreeMap<String, String>();
        StringTokenizer st = new StringTokenizer(dynamicProperties, ";");
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            int equalPlace = token.indexOf("=");
            tm.put(token.substring(0, (equalPlace)), token.substring(equalPlace + 1));
        }/*from w ww.j  av  a2 s.  c  o m*/
        if (tm.containsKey(name)) {
            tm.remove(name);

            //now let's recreate the dynamicProperties String
            String newProps = tm.toString();
            int stringSize = newProps.length();
            dynamicProperties = newProps.substring(1, (stringSize - 1)).replaceAll(", ", ";") + ";";
        }
    }
}

From source file:org.ecocean.Encounter.java

public void setDynamicProperty(String name, String value) {
    name = name.replaceAll(";", "_").trim().replaceAll("%20", " ");
    value = value.replaceAll(";", "_").trim();

    if (dynamicProperties == null) {
        dynamicProperties = name + "=" + value + ";";
    } else {/*from   w  ww.j  av  a  2  s. co  m*/

        //let's create a TreeMap of the properties
        TreeMap<String, String> tm = new TreeMap<String, String>();
        StringTokenizer st = new StringTokenizer(dynamicProperties, ";");
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            int equalPlace = token.indexOf("=");
            try {
                tm.put(token.substring(0, equalPlace), token.substring(equalPlace + 1));
            } catch (java.lang.StringIndexOutOfBoundsException soe) {
                //this is a badly formatted pair that should be ignored
            }
        }
        if (tm.containsKey(name)) {
            tm.remove(name);
            tm.put(name, value);

            //now let's recreate the dynamicProperties String
            String newProps = tm.toString();
            int stringSize = newProps.length();
            dynamicProperties = newProps.substring(1, (stringSize - 1)).replaceAll(", ", ";") + ";";
        } else {
            dynamicProperties = dynamicProperties + name + "=" + value + ";";
        }
    }
}

From source file:com.google.gwt.emultest.java.util.TreeMapTest.java

public void testRemove_ComparableKey() {
    TreeMap<String, Object> map = new TreeMap<String, Object>();
    ConflictingKey conflictingKey = new ConflictingKey("conflictingKey");
    assertNull(map.remove(conflictingKey));
    map.put("something", "value");
    assertNull(map.remove(conflictingKey));
}

From source file:com.sfs.whichdoctor.dao.PersonDAOImpl.java

/**
 * Load a list of people this person has supervised in the past.
 *
 * @param guid the guid/*from w w  w .j  a va2s.  co m*/
 * @param allRotations the all rotations
 * @return the collection
 */
private HashMap<String, ArrayList<PersonBean>> loadSupervisedPeople(final int guid,
        final boolean allRotations) {

    HashMap<String, ArrayList<PersonBean>> supervisedPeople = new HashMap<String, ArrayList<PersonBean>>();

    // Create new SearchBean of with default values
    SearchBean searchRotations = this.getSearchDAO().initiate("rotation", null);
    searchRotations.setLimit(0);

    RotationBean rotationParam = (RotationBean) searchRotations.getSearchCriteria();
    SupervisorBean supervisor = new SupervisorBean();
    supervisor.setPersonGUID(guid);
    rotationParam.addSupervisor(supervisor);

    BuilderBean loadDetails = new BuilderBean();
    loadDetails.setParameter("ASSESSMENTS", true);
    loadDetails.setParameter("SUPERVISORS", true);

    searchRotations.setSearchCriteria(rotationParam);
    searchRotations.setOrderColumn("rotation.StartDate");
    searchRotations.setOrderColumn2("people.LastName");
    searchRotations.setOrderAscending(false);

    SearchResultsBean studentsSupervised = new SearchResultsBean();
    try {
        studentsSupervised = this.getSearchDAO().search(searchRotations, loadDetails);
    } catch (Exception e) {
        dataLogger.error("Error searching for supervised people: " + e.getMessage());
    }

    final Calendar currentDate = Calendar.getInstance();

    final TreeMap<String, ArrayList<RotationBean>> currentlySupervising = new TreeMap<String, ArrayList<RotationBean>>();
    final TreeMap<String, ArrayList<RotationBean>> previouslySupervised = new TreeMap<String, ArrayList<RotationBean>>();
    final HashMap<String, PersonBean> personMap = new HashMap<String, PersonBean>();

    for (Object rotationObj : studentsSupervised.getSearchResults()) {
        final RotationBean rotation = (RotationBean) rotationObj;

        boolean currentlyTakingPlace = false;

        if (rotation.getStartDate().before(currentDate.getTime())
                && rotation.getEndDate().after(currentDate.getTime())) {
            currentlyTakingPlace = true;
        }

        if (rotation.getPerson() != null) {
            final PersonBean person = rotation.getPerson();

            final String index = person.getLastName() + " " + person.getPreferredName() + " "
                    + person.getPersonIdentifier();

            boolean processed = false;

            if (currentlySupervising.containsKey(index)) {
                // The person exists in the currently supervising list.
                ArrayList<RotationBean> tneRots = currentlySupervising.get(index);
                if (allRotations || currentlyTakingPlace) {
                    tneRots.add(rotation);
                }
                currentlySupervising.put(index, tneRots);
                processed = true;
            }
            if (previouslySupervised.containsKey(index)) {
                // The person exists in the previously supervised list
                ArrayList<RotationBean> tneRots = previouslySupervised.get(index);
                if (allRotations || currentlyTakingPlace) {
                    tneRots.add(rotation);
                }
                if (currentlyTakingPlace) {
                    // This is a current rotation, remove from the previously
                    // supervised list and add to currently supervising.
                    previouslySupervised.remove(index);
                    currentlySupervising.put(index, tneRots);
                } else {
                    previouslySupervised.put(index, tneRots);
                }
                processed = true;
            }

            if (!processed) {
                // This person has not been encountered yet.
                personMap.put(index, person);

                ArrayList<RotationBean> tneRots = new ArrayList<RotationBean>();
                if (allRotations || currentlyTakingPlace) {
                    tneRots.add(rotation);
                }
                if (currentlyTakingPlace) {
                    currentlySupervising.put(index, tneRots);
                } else {
                    previouslySupervised.put(index, tneRots);
                }
            }
        }
    }

    final ArrayList<PersonBean> currentPeople = new ArrayList<PersonBean>();
    final ArrayList<PersonBean> previousPeople = new ArrayList<PersonBean>();

    for (String index : currentlySupervising.keySet()) {
        final PersonBean person = personMap.get(index);
        final ArrayList<RotationBean> tneRots = currentlySupervising.get(index);
        person.setRotations(tneRots);
        currentPeople.add(person);
    }
    for (String index : previouslySupervised.keySet()) {
        final PersonBean person = personMap.get(index);
        final ArrayList<RotationBean> tneRots = previouslySupervised.get(index);
        person.setRotations(tneRots);
        previousPeople.add(person);
    }

    supervisedPeople.put("current", currentPeople);
    supervisedPeople.put("previous", previousPeople);

    return supervisedPeople;
}

From source file:us.levk.math.linear.EucledianDistanceClusterer.java

public Cluster eucledian(final RealMatrix original) throws IOException {
    try (HugeRealMatrix distances = new HugeRealMatrix(original.getRowDimension(),
            original.getRowDimension())) {
        final Map<Integer, Cluster> genehash = new HashMap<Integer, Cluster>() {
            private static final long serialVersionUID = 1L;

            {/*  w w w .  ja va2  s  .  c o m*/
                for (int index = original.getRowDimension(); --index >= 0; put(index, new Cluster(index)))
                    ;
            }
        };
        TreeMap<Double, int[]> sorted = new TreeMap<>();

        log.debug("Populating distance matrix");
        for (int i = 0; i < original.getRowDimension(); i++) {
            for (int j = i + 1; j < original.getRowDimension(); j++) {
                // Euclidean distance calculation.
                double total = 0;
                for (int k = 0; k < original.getColumnDimension(); k++) {
                    double left = original.getEntry(i, k);
                    double right = original.getEntry(j, k);
                    if (!isNaN(left) && !isNaN(right) && !isInfinite(left) && !isInfinite(right))
                        total += Math.pow(left - right, 2);
                }
                double distance = Math.pow(total, 0.5);

                distances.setEntry(i, j, distance);
                distances.setEntry(j, i, distance);
                int[] genePair = { i, j };
                // Enter the distance calculated and the genes measured into a
                // treemap. Will be automatically sorted.
                sorted.put(distance, genePair);
            }
        }
        log.debug("Initialized distances matrix " + distances);

        while (true) {
            // Get the first key of the TreeMap. Will be the shortest distance de
            // facto.
            final double minkey = (Double) sorted.firstKey();
            int[] minValues = (int[]) sorted.firstEntry().getValue();

            final int value1 = minValues[0], value2 = minValues[1];
            // find

            Cluster cluster = new Cluster(genehash.get(value1), genehash.get(value2)) {
                {
                    log.debug("Generating cluster from " + value1 + " and " + value2 + " in " + genehash);
                    contains().addAll(genehash.get(value1).contains());
                    contains().addAll(genehash.get(value2).contains());
                    d(minkey);
                    log.debug("Generated cluster " + this);
                }
            };

            genehash.put(cluster.id(), cluster);
            genehash.remove(value1);
            genehash.remove(value2);

            if (genehash.size() <= 1)
                break;

            // Iterate over all the current clusters to remeasure distance with the
            // previously clustered group.
            for (Cluster c : genehash.values()) {
                // Skip measuring the new cluster with itself.
                if (c == cluster)
                    continue;

                double distance = 0;
                int n = 0;
                // Get genes from each cluster. Distance is measured from each element
                // to every element.
                for (int current : c.contains())
                    for (int created : cluster.contains()) {
                        distance += distances.getEntry(current, created);
                        n++;
                    }

                distance = distance / n;

                int[] valuePair = { c.id(), cluster.id() };
                sorted.put(distance, valuePair);
            }

            // Get the shortest distance.
            // Check to make sure shortest distance does not include a gene pair
            // that
            // has already had its elements clustered.
            boolean minimized = false;
            while (!minimized) {
                double mk = sorted.firstKey();
                minValues = sorted.firstEntry().getValue();
                // If the gene pair is not present in the current gene set, remove
                // this
                // distance.
                if (!genehash.containsKey(minValues[0]) || !genehash.containsKey(minValues[1]))
                    sorted.remove(mk);
                else
                    minimized = true;
            }
        }

        return genehash.entrySet().iterator().next().getValue();
    }
}

From source file:edu.dfci.cccb.mev.hcl.domain.simple.SimpleTwoDimensionalHclBuilder.java

private Node cluster(final Dataset dataset, Dimension dimension, Metric metric, Linkage linkage)
        throws DatasetException {
    final Type dimensionType = dimension.type();
    final RealMatrix original = toRealMatrix(dataset);
    final int size = dimensionType == ROW ? original.getRowDimension() : original.getColumnDimension();
    final int other = dimensionType == COLUMN ? original.getRowDimension() : original.getColumnDimension();
    Iterator<Integer> enumerator = new Iterator<Integer>() {

        private int counter = -1;

        @Override/*from w  w  w.  j a  v a  2 s . c  om*/
        public boolean hasNext() {
            return true;
        }

        @Override
        public Integer next() {
            counter--;
            if (counter > 0)
                counter = -1;
            return counter;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    };
    final double[][] distances = new double[size][size];

    log.debug("Populating node hash");
    final Map<Integer, Node> genehash = new HashMap<Integer, Node>() {
        private static final long serialVersionUID = 1L;

        {
            for (int index = size; --index >= 0; put(index,
                    nodeBuilder().leaf(dataset.dimension(dimensionType).keys().get(index))))
                ;
        }
    };
    TreeMap<Double, int[]> sorted = new TreeMap<>();

    log.debug("Populating distance matrix");
    for (int i = 0; i < size; i++) {
        for (int j = i + 1; j < size; j++) {
            double distance = metric.distance(new AbstractList<Double>() {

                private int i;

                @Override
                public Double get(int index) {
                    return dimensionType == ROW ? original.getEntry(i, index) : original.getEntry(index, i);
                }

                @Override
                public int size() {
                    return other;
                }

                private List<Double> initializeProjection(int i) {
                    this.i = i;
                    return this;
                }
            }.initializeProjection(i), new AbstractList<Double>() {

                private int j;

                @Override
                public Double get(int index) {
                    return dimensionType == ROW ? original.getEntry(j, index) : original.getEntry(index, j);
                }

                @Override
                public int size() {
                    return other;
                }

                private List<Double> initializeProjection(int j) {
                    this.j = j;
                    return this;
                }
            }.initializeProjection(j));

            distances[i][j] = distance;
            distances[j][i] = distance;
            int[] genePair = { i, j };
            // Enter the distance calculated and the genes measured into a
            // treemap. Will be automatically sorted.
            sorted.put(distance, genePair);
        }
    }

    log.debug("Aggregating");
    while (true) {
        // Get the first key of the TreeMap. Will be the shortest distance de
        // facto.
        final double minkey = (Double) sorted.firstKey();
        int[] minValues = (int[]) sorted.firstEntry().getValue();

        final int value1 = minValues[0], value2 = minValues[1];
        // find

        Node cluster = nodeBuilder().branch(minkey, genehash.get(value1), genehash.get(value2));
        int id = enumerator.next();

        genehash.put(id, cluster);
        genehash.remove(value1);
        genehash.remove(value2);

        if (genehash.size() <= 1)
            break;

        // Iterate over all the current clusters to remeasure distance with the
        // previously clustered group.
        for (Entry<Integer, Node> e : genehash.entrySet()) {
            Node c = e.getValue();
            // Skip measuring the new cluster with itself.
            if (c == cluster)
                continue;

            List<Double> aggregation = new ArrayList<>();
            // Get genes from each cluster. Distance is measured from each element
            // to every element.
            for (int current : traverse(dimension.keys(), c))
                for (int created : traverse(dimension.keys(), cluster))
                    aggregation.add(distances[current][created]);

            int[] valuePair = { e.getKey(), id };
            sorted.put(linkage.aggregate(aggregation), valuePair);
        }

        // Get the shortest distance.
        // Check to make sure shortest distance does not include a gene pair
        // that
        // has already had its elements clustered.
        boolean minimized = false;
        while (!minimized) {
            double mk = sorted.firstKey();
            minValues = sorted.firstEntry().getValue();
            // If the gene pair is not present in the current gene set, remove
            // this distance.
            if (!genehash.containsKey(minValues[0]) || !genehash.containsKey(minValues[1]))
                sorted.remove(mk);
            else
                minimized = true;
        }
    }

    Node result = genehash.entrySet().iterator().next().getValue();
    log.debug("Clustered " + result);
    return result;
}

From source file:org.apache.hadoop.mapred.HFSPScheduler.java

private void assignSizeBasedTasks(TaskType type, HelperForType helper,
        TreeMap<JobDurationInfo, JobInProgress> sizeBasedJobs,
        TreeMap<JobDurationInfo, TaskStatuses> taskStatusesSizeBased) throws IOException {

    final boolean isMap = type == TaskType.MAP;
    int totClaimedSlots = 0;

    // StringBuilder builder = new StringBuilder("SBJobs(");
    // builder.append(type).append("): [");
    // boolean first = true;
    // for (Entry<JobDurationInfo,JobInProgress> jip : sizeBasedJobs.entrySet())
    // {/*from  ww w.j ava2  s  . c o  m*/
    // if (first)
    // first = false;
    // else
    // builder.append(",");
    // builder.append(jip.getValue().getJobID())
    // .append(" -> ")
    // .append(jip.getKey().getPhaseDuration())
    // .append("/")
    // .append(jip.getKey().getPhaseTotalDuration())
    // .append(" p: ")
    // .append(this.getNumPendingNewTasks(jip.getValue(), type))
    // .append(" r: ")
    // .append(this.getNumRunningTasks(jip.getValue(), type))
    // .append(" f: ")
    // .append(this.getNumFinishedTasks(jip.getValue(), type));
    // }
    // builder.append("]");
    // LOG.debug(builder.toString());

    for (Entry<JobDurationInfo, JobInProgress> entry : sizeBasedJobs.entrySet()) {

        JobInProgress jip = entry.getValue();
        JobDurationInfo jdi = entry.getKey();
        TaskStatuses taskStatuses = taskStatusesSizeBased.get(jdi);

        if (!this.isJobReadyForTypeScheduling(jip, type)) {
            if (LOG.isDebugEnabled() && jip.getStatus().getRunState() != JobStatus.SUCCEEDED) {
                LOG.debug(
                        "SIZEBASED(" + jip.getJobID() + ":" + type + "):" + "job is not ready for scheduling ("
                                + "status: " + JobStatus.getJobRunState(jip.getStatus().getRunState())
                                + ", mapProgress: " + jip.getStatus().mapProgress() + ", reduceProgress: "
                                + jip.getStatus().reduceProgress() + ", scheduleReduces: "
                                + jip.scheduleReduces() + ")");
            }
            continue;
        }

        // NEW
        int pendingNewTasks = this.getNumPendingNewTasks(jip, type);
        int pendingResumableTasks = (taskStatuses == null) ? 0 : taskStatuses.suspendedTaskStatuses.size();

        int totAvailableSizeBasedSlots = helper.totAvailableSizeBasedSlots();

        // missing slots for resumable
        int missingResumableSlots = 0;
        if (pendingResumableTasks > 0 && pendingResumableTasks > totAvailableSizeBasedSlots) {
            if (totAvailableSizeBasedSlots <= 0)
                missingResumableSlots = pendingResumableTasks;
            else
                missingResumableSlots = pendingResumableTasks - totAvailableSizeBasedSlots;
            totAvailableSizeBasedSlots = (pendingResumableTasks > totAvailableSizeBasedSlots) ? 0
                    : totAvailableSizeBasedSlots - pendingResumableTasks;
        }

        int missingNewSlots = 0;
        if (pendingNewTasks > 0 && pendingNewTasks > totAvailableSizeBasedSlots) {
            if (totAvailableSizeBasedSlots <= 0)
                missingNewSlots = pendingNewTasks;
            else
                missingNewSlots = pendingNewTasks - totAvailableSizeBasedSlots;
            totAvailableSizeBasedSlots = (pendingNewTasks > totAvailableSizeBasedSlots) ? 0
                    : totAvailableSizeBasedSlots - pendingNewTasks;
        }

        TreeMap<TaskAttemptID, TaskStatus> suspended = null;
        if (taskStatuses != null)
            suspended = taskStatuses.suspendedTaskStatuses;

        if (pendingNewTasks > 0 || pendingResumableTasks > 0 || (suspended != null && !suspended.isEmpty())) {
            LOG.debug(jip.getJobID() + ":" + type + " (d: " + jdi.getPhaseDuration() + "/"
                    + jdi.getPhaseTotalDuration() + "):" + " pendingNewTasks: " + pendingNewTasks
                    + " pendingResumableTasks: " + pendingResumableTasks
                    // + " notResumableTasksOnThisTT: " + notResumableTasks
                    + " totAvailableSizeBasedSlots: "
                    + (helper.totAvailableSizeBasedSlots() <= 0 ? 0 : helper.totAvailableSizeBasedSlots())
                    + " currAvailableSlots: " + helper.currAvailableSlots + " => missingNewSlots: "
                    + missingNewSlots + " missingResumableSlots: " + missingResumableSlots);
        }

        if (this.preemptionStrategy.isPreemptionActive()
                && (missingNewSlots > 0 || missingResumableSlots > 0)) {
            ClaimedSlots claimedSlots = this.claimSlots(helper, Phase.SIZE_BASED, jip, missingNewSlots,
                    missingResumableSlots, totClaimedSlots, sizeBasedJobs, taskStatusesSizeBased);

            totClaimedSlots += claimedSlots.getNumPreemptedForNewTasks()
                    + claimedSlots.getNumPreemptedForResumableTasks();

            LOG.debug(jip.getJobID() + " taskStatusesOnTT: " + taskStatusesSizeBased.get(jdi)
                    + " pendingNewTasks: " + pendingNewTasks + " pendingResumableTasks: "
                    + pendingResumableTasks + " missingNewSlots: " + missingNewSlots
                    + " missingResumableSlots: " + missingResumableSlots);
        }

        while (pendingNewTasks > 0 || pendingResumableTasks > 0
                || (suspended != null && !suspended.isEmpty())) {

            if (helper.currAvailableSlots <= 0) {
                LOG.debug("SIZEBASED(" + jip.getJobID() + ":" + type + "):" + " no slots available on "
                        + taskHelper.ttStatus.getTrackerName());
                return;
            }

            LOG.debug("SIZEBASED(" + jip.getJobID() + ":" + type + "):" + " totAvailableSizeBasedSlots(): "
                    + helper.totAvailableSizeBasedSlots() + " pendingNewTasks: " + pendingNewTasks
                    + " pendingResumableTasks: " + pendingResumableTasks + " suspended("
                    + (suspended == null ? 0 : suspended.size()) + "): " + suspended);

            if (this.preemptionStrategy.isPreemptionActive() && (suspended != null && !suspended.isEmpty())) {
                TaskStatus toResume = suspended.remove(suspended.firstKey());
                // LOG.debug("RESUME: " + toResume.getTaskID() + " " +
                // toResume.getRunState());
                TaskAttemptID tAID = toResume.getTaskID();
                JobInProgress rJIP = this.taskTrackerManager.getJob(tAID.getTaskID().getJobID());
                TaskInProgress tip = rJIP.getTaskInProgress(tAID.getTaskID());
                if (this.preemptionStrategy.resume(tip, toResume)) {
                    taskHelper.resume(tAID, Phase.SIZE_BASED);
                    pendingResumableTasks -= 1;
                } else {
                    LOG.debug("SIZEBASED(" + jip.getJobID() + ":" + type + "):" + " cannot resume " + tAID
                            + " on " + taskHelper.ttStatus.getTrackerName());
                }
            } else {

                Task task = this.obtainNewTask(jip, taskHelper.ttStatus, isMap, taskHelper.currentTime);

                if (task == null) {
                    LOG.debug("SIZEBASED(" + jip.getJobID() + ":" + type + "):"
                            + " cannot obtain slot for new task on " + taskHelper.ttStatus.getTrackerName()
                            + " (#pendingNew: " + pendingNewTasks + ", #pendingResumable: "
                            + pendingResumableTasks + ", #free_" + type + "_slots: " + helper.currAvailableSlots
                            + ")");
                    break;
                }

                taskHelper.slotObtained(task, Phase.SIZE_BASED);
                pendingNewTasks -= 1;
            }
        }
    }
}

From source file:org.apache.hadoop.hbase.util.RegionSplitter.java

static void rollingSplit(String tableName, SplitAlgorithm splitAlgo, Configuration conf)
        throws IOException, InterruptedException {
    final int minOS = conf.getInt("split.outstanding", 2);

    HTable table = new HTable(conf, tableName);

    // max outstanding splits. default == 50% of servers
    final int MAX_OUTSTANDING = Math.max(table.getConnection().getCurrentNrHRS() / 2, minOS);

    Path hbDir = FSUtils.getRootDir(conf);
    Path tableDir = FSUtils.getTableDir(hbDir, table.getName());
    Path splitFile = new Path(tableDir, "_balancedSplit");
    FileSystem fs = FileSystem.get(conf);

    // get a list of daughter regions to create
    LinkedList<Pair<byte[], byte[]>> tmpRegionSet = getSplits(table, splitAlgo);
    LinkedList<Pair<byte[], byte[]>> outstanding = Lists.newLinkedList();
    int splitCount = 0;
    final int origCount = tmpRegionSet.size();

    // all splits must compact & we have 1 compact thread, so 2 split
    // requests to the same RS can stall the outstanding split queue.
    // To fix, group the regions into an RS pool and round-robin through it
    LOG.debug("Bucketing regions by regionserver...");
    TreeMap<String, LinkedList<Pair<byte[], byte[]>>> daughterRegions = Maps.newTreeMap();
    for (Pair<byte[], byte[]> dr : tmpRegionSet) {
        String rsLocation = table.getRegionLocation(dr.getSecond()).getHostnamePort();
        if (!daughterRegions.containsKey(rsLocation)) {
            LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList();
            daughterRegions.put(rsLocation, entry);
        }//from  w  w w .java  2 s .c om
        daughterRegions.get(rsLocation).add(dr);
    }
    LOG.debug("Done with bucketing.  Split time!");
    long startTime = System.currentTimeMillis();

    // open the split file and modify it as splits finish
    FSDataInputStream tmpIn = fs.open(splitFile);
    byte[] rawData = new byte[tmpIn.available()];
    tmpIn.readFully(rawData);
    tmpIn.close();
    FSDataOutputStream splitOut = fs.create(splitFile);
    splitOut.write(rawData);

    try {
        // *** split code ***
        while (!daughterRegions.isEmpty()) {
            LOG.debug(daughterRegions.size() + " RS have regions to splt.");

            // Get RegionServer : region count mapping
            final TreeMap<ServerName, Integer> rsSizes = Maps.newTreeMap();
            Map<HRegionInfo, ServerName> regionsInfo = table.getRegionLocations();
            for (ServerName rs : regionsInfo.values()) {
                if (rsSizes.containsKey(rs)) {
                    rsSizes.put(rs, rsSizes.get(rs) + 1);
                } else {
                    rsSizes.put(rs, 1);
                }
            }

            // sort the RS by the number of regions they have
            List<String> serversLeft = Lists.newArrayList(daughterRegions.keySet());
            Collections.sort(serversLeft, new Comparator<String>() {
                public int compare(String o1, String o2) {
                    return rsSizes.get(o1).compareTo(rsSizes.get(o2));
                }
            });

            // round-robin through the RS list. Choose the lightest-loaded servers
            // first to keep the master from load-balancing regions as we split.
            for (String rsLoc : serversLeft) {
                Pair<byte[], byte[]> dr = null;

                // find a region in the RS list that hasn't been moved
                LOG.debug("Finding a region on " + rsLoc);
                LinkedList<Pair<byte[], byte[]>> regionList = daughterRegions.get(rsLoc);
                while (!regionList.isEmpty()) {
                    dr = regionList.pop();

                    // get current region info
                    byte[] split = dr.getSecond();
                    HRegionLocation regionLoc = table.getRegionLocation(split);

                    // if this region moved locations
                    String newRs = regionLoc.getHostnamePort();
                    if (newRs.compareTo(rsLoc) != 0) {
                        LOG.debug("Region with " + splitAlgo.rowToStr(split) + " moved to " + newRs
                                + ". Relocating...");
                        // relocate it, don't use it right now
                        if (!daughterRegions.containsKey(newRs)) {
                            LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList();
                            daughterRegions.put(newRs, entry);
                        }
                        daughterRegions.get(newRs).add(dr);
                        dr = null;
                        continue;
                    }

                    // make sure this region wasn't already split
                    byte[] sk = regionLoc.getRegionInfo().getStartKey();
                    if (sk.length != 0) {
                        if (Bytes.equals(split, sk)) {
                            LOG.debug("Region already split on " + splitAlgo.rowToStr(split)
                                    + ".  Skipping this region...");
                            ++splitCount;
                            dr = null;
                            continue;
                        }
                        byte[] start = dr.getFirst();
                        Preconditions.checkArgument(Bytes.equals(start, sk),
                                splitAlgo.rowToStr(start) + " != " + splitAlgo.rowToStr(sk));
                    }

                    // passed all checks! found a good region
                    break;
                }
                if (regionList.isEmpty()) {
                    daughterRegions.remove(rsLoc);
                }
                if (dr == null)
                    continue;

                // we have a good region, time to split!
                byte[] split = dr.getSecond();
                LOG.debug("Splitting at " + splitAlgo.rowToStr(split));
                HBaseAdmin admin = new HBaseAdmin(table.getConfiguration());
                admin.split(table.getTableName(), split);

                LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList();
                if (conf.getBoolean("split.verify", true)) {
                    // we need to verify and rate-limit our splits
                    outstanding.addLast(dr);
                    // with too many outstanding splits, wait for some to finish
                    while (outstanding.size() >= MAX_OUTSTANDING) {
                        finished = splitScan(outstanding, table, splitAlgo);
                        if (finished.isEmpty()) {
                            Thread.sleep(30 * 1000);
                        } else {
                            outstanding.removeAll(finished);
                        }
                    }
                } else {
                    finished.add(dr);
                }

                // mark each finished region as successfully split.
                for (Pair<byte[], byte[]> region : finished) {
                    splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) + " "
                            + splitAlgo.rowToStr(region.getSecond()) + "\n");
                    splitCount++;
                    if (splitCount % 10 == 0) {
                        long tDiff = (System.currentTimeMillis() - startTime) / splitCount;
                        LOG.debug("STATUS UPDATE: " + splitCount + " / " + origCount + ". Avg Time / Split = "
                                + org.apache.hadoop.util.StringUtils.formatTime(tDiff));
                    }
                }
            }
        }
        if (conf.getBoolean("split.verify", true)) {
            while (!outstanding.isEmpty()) {
                LinkedList<Pair<byte[], byte[]>> finished = splitScan(outstanding, table, splitAlgo);
                if (finished.isEmpty()) {
                    Thread.sleep(30 * 1000);
                } else {
                    outstanding.removeAll(finished);
                    for (Pair<byte[], byte[]> region : finished) {
                        splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) + " "
                                + splitAlgo.rowToStr(region.getSecond()) + "\n");
                    }
                }
            }
        }
        LOG.debug("All regions have been successfully split!");
    } finally {
        long tDiff = System.currentTimeMillis() - startTime;
        LOG.debug("TOTAL TIME = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff));
        LOG.debug("Splits = " + splitCount);
        LOG.debug("Avg Time / Split = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff / splitCount));

        splitOut.close();
        if (table != null) {
            table.close();
        }
    }
    fs.delete(splitFile, false);
}

From source file:org.apache.nutch.crawl.CrawlDbReader.java

private TreeMap<String, Writable> processStatJobHelper(String crawlDb, Configuration config, boolean sort)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());

    Job job = NutchJob.getInstance(config);
    config = job.getConfiguration();//from w ww  .  jav  a  2 s . co  m
    job.setJobName("stats " + crawlDb);
    config.setBoolean("db.reader.stats.sort", sort);

    FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setJarByClass(CrawlDbReader.class);
    job.setMapperClass(CrawlDbStatMapper.class);
    job.setCombinerClass(CrawlDbStatReducer.class);
    job.setReducerClass(CrawlDbStatReducer.class);

    FileOutputFormat.setOutputPath(job, tmpFolder);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NutchWritable.class);

    // https://issues.apache.org/jira/browse/NUTCH-1029
    config.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
    FileSystem fileSystem = tmpFolder.getFileSystem(config);
    try {
        boolean success = job.waitForCompletion(true);
        if (!success) {
            String message = "CrawlDbReader job did not succeed, job status:" + job.getStatus().getState()
                    + ", reason: " + job.getStatus().getFailureInfo();
            LOG.error(message);
            fileSystem.delete(tmpFolder, true);
            throw new RuntimeException(message);
        }
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        LOG.error(StringUtils.stringifyException(e));
        fileSystem.delete(tmpFolder, true);
        throw e;
    }

    // reading the result
    SequenceFile.Reader[] readers = SegmentReaderUtil.getReaders(tmpFolder, config);

    Text key = new Text();
    NutchWritable value = new NutchWritable();

    TreeMap<String, Writable> stats = new TreeMap<>();
    for (int i = 0; i < readers.length; i++) {
        SequenceFile.Reader reader = readers[i];
        while (reader.next(key, value)) {
            String k = key.toString();
            Writable val = stats.get(k);
            if (val == null) {
                stats.put(k, value.get());
                continue;
            }
            if (k.equals("sc")) {
                float min = Float.MAX_VALUE;
                float max = Float.MIN_VALUE;
                if (stats.containsKey("scn")) {
                    min = ((FloatWritable) stats.get("scn")).get();
                } else {
                    min = ((FloatWritable) stats.get("sc")).get();
                }
                if (stats.containsKey("scx")) {
                    max = ((FloatWritable) stats.get("scx")).get();
                } else {
                    max = ((FloatWritable) stats.get("sc")).get();
                }
                float fvalue = ((FloatWritable) value.get()).get();
                if (min > fvalue) {
                    min = fvalue;
                }
                if (max < fvalue) {
                    max = fvalue;
                }
                stats.put("scn", new FloatWritable(min));
                stats.put("scx", new FloatWritable(max));
            } else if (k.equals("ft") || k.equals("fi")) {
                long min = Long.MAX_VALUE;
                long max = Long.MIN_VALUE;
                String minKey = k + "n";
                String maxKey = k + "x";
                if (stats.containsKey(minKey)) {
                    min = ((LongWritable) stats.get(minKey)).get();
                } else if (stats.containsKey(k)) {
                    min = ((LongWritable) stats.get(k)).get();
                }
                if (stats.containsKey(maxKey)) {
                    max = ((LongWritable) stats.get(maxKey)).get();
                } else if (stats.containsKey(k)) {
                    max = ((LongWritable) stats.get(k)).get();
                }
                long lvalue = ((LongWritable) value.get()).get();
                if (min > lvalue) {
                    min = lvalue;
                }
                if (max < lvalue) {
                    max = lvalue;
                }
                stats.put(k + "n", new LongWritable(min));
                stats.put(k + "x", new LongWritable(max));
            } else if (k.equals("sct")) {
                FloatWritable fvalue = (FloatWritable) value.get();
                ((FloatWritable) val).set(((FloatWritable) val).get() + fvalue.get());
            } else if (k.equals("scd")) {
                MergingDigest tdigest = null;
                MergingDigest tdig = MergingDigest
                        .fromBytes(ByteBuffer.wrap(((BytesWritable) value.get()).getBytes()));
                if (val instanceof BytesWritable) {
                    tdigest = MergingDigest.fromBytes(ByteBuffer.wrap(((BytesWritable) val).getBytes()));
                    tdigest.add(tdig);
                } else {
                    tdigest = tdig;
                }
                ByteBuffer tdigestBytes = ByteBuffer.allocate(tdigest.smallByteSize());
                tdigest.asSmallBytes(tdigestBytes);
                stats.put(k, new BytesWritable(tdigestBytes.array()));
            } else {
                LongWritable lvalue = (LongWritable) value.get();
                ((LongWritable) val).set(((LongWritable) val).get() + lvalue.get());
            }
        }
        reader.close();
    }
    // remove score, fetch interval, and fetch time
    // (used for min/max calculation)
    stats.remove("sc");
    stats.remove("fi");
    stats.remove("ft");
    // removing the tmp folder
    fileSystem.delete(tmpFolder, true);
    return stats;
}

From source file:org.apache.pdfbox.pdfparser.NonSequentialPDFParser.java

/**
 * Will parse every object necessary to load a single page from the pdf document.
 * We try our best to order objects according to offset in file before reading
 * to minimize seek operations.//  www  . j  ava2s .co  m
 * 
 * @param dict the COSObject from the parent pages.
 * @param excludeObjects dictionary object reference entries with these names will not be parsed
 * 
 * @throws IOException
 */
private void parseDictObjects(COSDictionary dict, COSName... excludeObjects) throws IOException {
    // ---- create queue for objects waiting for further parsing
    final Queue<COSBase> toBeParsedList = new LinkedList<COSBase>();
    // offset ordered object map
    final TreeMap<Long, List<COSObject>> objToBeParsed = new TreeMap<Long, List<COSObject>>();
    // in case of compressed objects offset points to stmObj
    final Set<Long> parsedObjects = new HashSet<Long>();
    final Set<Long> addedObjects = new HashSet<Long>();

    // ---- add objects not to be parsed to list of already parsed objects
    if (excludeObjects != null) {
        for (COSName objName : excludeObjects) {
            COSBase baseObj = dict.getItem(objName);
            if (baseObj instanceof COSObject) {
                parsedObjects.add(getObjectId((COSObject) baseObj));
            }
        }
    }

    addNewToList(toBeParsedList, dict.getValues(), addedObjects);

    // ---- go through objects to be parsed
    while (!(toBeParsedList.isEmpty() && objToBeParsed.isEmpty())) {
        // -- first get all COSObject from other kind of objects and
        //    put them in objToBeParsed; afterwards toBeParsedList is empty
        COSBase baseObj;
        while ((baseObj = toBeParsedList.poll()) != null) {
            if (baseObj instanceof COSStream) {
                addNewToList(toBeParsedList, ((COSStream) baseObj).getValues(), addedObjects);
            } else if (baseObj instanceof COSDictionary) {
                addNewToList(toBeParsedList, ((COSDictionary) baseObj).getValues(), addedObjects);
            } else if (baseObj instanceof COSArray) {
                final Iterator<COSBase> arrIter = ((COSArray) baseObj).iterator();
                while (arrIter.hasNext()) {
                    addNewToList(toBeParsedList, arrIter.next(), addedObjects);
                }
            } else if (baseObj instanceof COSObject) {
                COSObject obj = (COSObject) baseObj;
                long objId = getObjectId(obj);
                COSObjectKey objKey = new COSObjectKey(obj.getObjectNumber().intValue(),
                        obj.getGenerationNumber().intValue());

                if (!(parsedObjects.contains(objId) /*|| document.hasObjectInPool( objKey ) */ )) {
                    Long fileOffset = xrefTrailerResolver.getXrefTable().get(objKey);
                    //  it is allowed that object references point to null, thus we have to test
                    if (fileOffset != null) {
                        if (fileOffset > 0) {
                            objToBeParsed.put(fileOffset, Collections.singletonList(obj));
                        } else {
                            // negative offset means we have a compressed object within object stream;
                            // get offset of object stream
                            fileOffset = xrefTrailerResolver.getXrefTable()
                                    .get(new COSObjectKey(-fileOffset, 0));
                            if ((fileOffset == null) || (fileOffset <= 0)) {
                                throw new IOException(
                                        "Invalid object stream xref object reference: " + fileOffset);
                            }

                            List<COSObject> stmObjects = objToBeParsed.get(fileOffset);
                            if (stmObjects == null) {
                                objToBeParsed.put(fileOffset, stmObjects = new ArrayList<COSObject>());
                            }
                            stmObjects.add(obj);
                        }
                    } else {
                        // NULL object
                        COSObject pdfObject = document.getObjectFromPool(objKey);
                        pdfObject.setObject(COSNull.NULL);
                    }
                }
            }
        }

        // ---- read first COSObject with smallest offset;
        //      resulting object will be added to toBeParsedList
        if (objToBeParsed.isEmpty()) {
            break;
        }

        for (COSObject obj : objToBeParsed.remove(objToBeParsed.firstKey())) {
            COSBase parsedObj = parseObjectDynamically(obj, false);

            obj.setObject(parsedObj);
            addNewToList(toBeParsedList, parsedObj, addedObjects);

            parsedObjects.add(getObjectId(obj));
        }
    }
}