List of usage examples for java.util TreeMap remove
public V remove(Object key)
From source file:org.ecocean.Encounter.java
public void removeDynamicProperty(String name) { name = name.replaceAll(";", "_").trim().replaceAll("%20", " "); if (dynamicProperties != null) { //let's create a TreeMap of the properties TreeMap<String, String> tm = new TreeMap<String, String>(); StringTokenizer st = new StringTokenizer(dynamicProperties, ";"); while (st.hasMoreTokens()) { String token = st.nextToken(); int equalPlace = token.indexOf("="); tm.put(token.substring(0, (equalPlace)), token.substring(equalPlace + 1)); }/*from w ww.j av a2 s. c o m*/ if (tm.containsKey(name)) { tm.remove(name); //now let's recreate the dynamicProperties String String newProps = tm.toString(); int stringSize = newProps.length(); dynamicProperties = newProps.substring(1, (stringSize - 1)).replaceAll(", ", ";") + ";"; } } }
From source file:org.ecocean.Encounter.java
public void setDynamicProperty(String name, String value) { name = name.replaceAll(";", "_").trim().replaceAll("%20", " "); value = value.replaceAll(";", "_").trim(); if (dynamicProperties == null) { dynamicProperties = name + "=" + value + ";"; } else {/*from w ww.j av a 2 s. co m*/ //let's create a TreeMap of the properties TreeMap<String, String> tm = new TreeMap<String, String>(); StringTokenizer st = new StringTokenizer(dynamicProperties, ";"); while (st.hasMoreTokens()) { String token = st.nextToken(); int equalPlace = token.indexOf("="); try { tm.put(token.substring(0, equalPlace), token.substring(equalPlace + 1)); } catch (java.lang.StringIndexOutOfBoundsException soe) { //this is a badly formatted pair that should be ignored } } if (tm.containsKey(name)) { tm.remove(name); tm.put(name, value); //now let's recreate the dynamicProperties String String newProps = tm.toString(); int stringSize = newProps.length(); dynamicProperties = newProps.substring(1, (stringSize - 1)).replaceAll(", ", ";") + ";"; } else { dynamicProperties = dynamicProperties + name + "=" + value + ";"; } } }
From source file:com.google.gwt.emultest.java.util.TreeMapTest.java
public void testRemove_ComparableKey() { TreeMap<String, Object> map = new TreeMap<String, Object>(); ConflictingKey conflictingKey = new ConflictingKey("conflictingKey"); assertNull(map.remove(conflictingKey)); map.put("something", "value"); assertNull(map.remove(conflictingKey)); }
From source file:com.sfs.whichdoctor.dao.PersonDAOImpl.java
/** * Load a list of people this person has supervised in the past. * * @param guid the guid/*from w w w .j a va2s. co m*/ * @param allRotations the all rotations * @return the collection */ private HashMap<String, ArrayList<PersonBean>> loadSupervisedPeople(final int guid, final boolean allRotations) { HashMap<String, ArrayList<PersonBean>> supervisedPeople = new HashMap<String, ArrayList<PersonBean>>(); // Create new SearchBean of with default values SearchBean searchRotations = this.getSearchDAO().initiate("rotation", null); searchRotations.setLimit(0); RotationBean rotationParam = (RotationBean) searchRotations.getSearchCriteria(); SupervisorBean supervisor = new SupervisorBean(); supervisor.setPersonGUID(guid); rotationParam.addSupervisor(supervisor); BuilderBean loadDetails = new BuilderBean(); loadDetails.setParameter("ASSESSMENTS", true); loadDetails.setParameter("SUPERVISORS", true); searchRotations.setSearchCriteria(rotationParam); searchRotations.setOrderColumn("rotation.StartDate"); searchRotations.setOrderColumn2("people.LastName"); searchRotations.setOrderAscending(false); SearchResultsBean studentsSupervised = new SearchResultsBean(); try { studentsSupervised = this.getSearchDAO().search(searchRotations, loadDetails); } catch (Exception e) { dataLogger.error("Error searching for supervised people: " + e.getMessage()); } final Calendar currentDate = Calendar.getInstance(); final TreeMap<String, ArrayList<RotationBean>> currentlySupervising = new TreeMap<String, ArrayList<RotationBean>>(); final TreeMap<String, ArrayList<RotationBean>> previouslySupervised = new TreeMap<String, ArrayList<RotationBean>>(); final HashMap<String, PersonBean> personMap = new HashMap<String, PersonBean>(); for (Object rotationObj : studentsSupervised.getSearchResults()) { final RotationBean rotation = (RotationBean) rotationObj; boolean currentlyTakingPlace = false; if (rotation.getStartDate().before(currentDate.getTime()) && rotation.getEndDate().after(currentDate.getTime())) { currentlyTakingPlace = true; } if (rotation.getPerson() != null) { final PersonBean person = rotation.getPerson(); final String index = person.getLastName() + " " + person.getPreferredName() + " " + person.getPersonIdentifier(); boolean processed = false; if (currentlySupervising.containsKey(index)) { // The person exists in the currently supervising list. ArrayList<RotationBean> tneRots = currentlySupervising.get(index); if (allRotations || currentlyTakingPlace) { tneRots.add(rotation); } currentlySupervising.put(index, tneRots); processed = true; } if (previouslySupervised.containsKey(index)) { // The person exists in the previously supervised list ArrayList<RotationBean> tneRots = previouslySupervised.get(index); if (allRotations || currentlyTakingPlace) { tneRots.add(rotation); } if (currentlyTakingPlace) { // This is a current rotation, remove from the previously // supervised list and add to currently supervising. previouslySupervised.remove(index); currentlySupervising.put(index, tneRots); } else { previouslySupervised.put(index, tneRots); } processed = true; } if (!processed) { // This person has not been encountered yet. personMap.put(index, person); ArrayList<RotationBean> tneRots = new ArrayList<RotationBean>(); if (allRotations || currentlyTakingPlace) { tneRots.add(rotation); } if (currentlyTakingPlace) { currentlySupervising.put(index, tneRots); } else { previouslySupervised.put(index, tneRots); } } } } final ArrayList<PersonBean> currentPeople = new ArrayList<PersonBean>(); final ArrayList<PersonBean> previousPeople = new ArrayList<PersonBean>(); for (String index : currentlySupervising.keySet()) { final PersonBean person = personMap.get(index); final ArrayList<RotationBean> tneRots = currentlySupervising.get(index); person.setRotations(tneRots); currentPeople.add(person); } for (String index : previouslySupervised.keySet()) { final PersonBean person = personMap.get(index); final ArrayList<RotationBean> tneRots = previouslySupervised.get(index); person.setRotations(tneRots); previousPeople.add(person); } supervisedPeople.put("current", currentPeople); supervisedPeople.put("previous", previousPeople); return supervisedPeople; }
From source file:us.levk.math.linear.EucledianDistanceClusterer.java
public Cluster eucledian(final RealMatrix original) throws IOException { try (HugeRealMatrix distances = new HugeRealMatrix(original.getRowDimension(), original.getRowDimension())) { final Map<Integer, Cluster> genehash = new HashMap<Integer, Cluster>() { private static final long serialVersionUID = 1L; {/* w w w . ja va2 s . c o m*/ for (int index = original.getRowDimension(); --index >= 0; put(index, new Cluster(index))) ; } }; TreeMap<Double, int[]> sorted = new TreeMap<>(); log.debug("Populating distance matrix"); for (int i = 0; i < original.getRowDimension(); i++) { for (int j = i + 1; j < original.getRowDimension(); j++) { // Euclidean distance calculation. double total = 0; for (int k = 0; k < original.getColumnDimension(); k++) { double left = original.getEntry(i, k); double right = original.getEntry(j, k); if (!isNaN(left) && !isNaN(right) && !isInfinite(left) && !isInfinite(right)) total += Math.pow(left - right, 2); } double distance = Math.pow(total, 0.5); distances.setEntry(i, j, distance); distances.setEntry(j, i, distance); int[] genePair = { i, j }; // Enter the distance calculated and the genes measured into a // treemap. Will be automatically sorted. sorted.put(distance, genePair); } } log.debug("Initialized distances matrix " + distances); while (true) { // Get the first key of the TreeMap. Will be the shortest distance de // facto. final double minkey = (Double) sorted.firstKey(); int[] minValues = (int[]) sorted.firstEntry().getValue(); final int value1 = minValues[0], value2 = minValues[1]; // find Cluster cluster = new Cluster(genehash.get(value1), genehash.get(value2)) { { log.debug("Generating cluster from " + value1 + " and " + value2 + " in " + genehash); contains().addAll(genehash.get(value1).contains()); contains().addAll(genehash.get(value2).contains()); d(minkey); log.debug("Generated cluster " + this); } }; genehash.put(cluster.id(), cluster); genehash.remove(value1); genehash.remove(value2); if (genehash.size() <= 1) break; // Iterate over all the current clusters to remeasure distance with the // previously clustered group. for (Cluster c : genehash.values()) { // Skip measuring the new cluster with itself. if (c == cluster) continue; double distance = 0; int n = 0; // Get genes from each cluster. Distance is measured from each element // to every element. for (int current : c.contains()) for (int created : cluster.contains()) { distance += distances.getEntry(current, created); n++; } distance = distance / n; int[] valuePair = { c.id(), cluster.id() }; sorted.put(distance, valuePair); } // Get the shortest distance. // Check to make sure shortest distance does not include a gene pair // that // has already had its elements clustered. boolean minimized = false; while (!minimized) { double mk = sorted.firstKey(); minValues = sorted.firstEntry().getValue(); // If the gene pair is not present in the current gene set, remove // this // distance. if (!genehash.containsKey(minValues[0]) || !genehash.containsKey(minValues[1])) sorted.remove(mk); else minimized = true; } } return genehash.entrySet().iterator().next().getValue(); } }
From source file:edu.dfci.cccb.mev.hcl.domain.simple.SimpleTwoDimensionalHclBuilder.java
private Node cluster(final Dataset dataset, Dimension dimension, Metric metric, Linkage linkage) throws DatasetException { final Type dimensionType = dimension.type(); final RealMatrix original = toRealMatrix(dataset); final int size = dimensionType == ROW ? original.getRowDimension() : original.getColumnDimension(); final int other = dimensionType == COLUMN ? original.getRowDimension() : original.getColumnDimension(); Iterator<Integer> enumerator = new Iterator<Integer>() { private int counter = -1; @Override/*from w w w. j a v a 2 s . c om*/ public boolean hasNext() { return true; } @Override public Integer next() { counter--; if (counter > 0) counter = -1; return counter; } @Override public void remove() { throw new UnsupportedOperationException(); } }; final double[][] distances = new double[size][size]; log.debug("Populating node hash"); final Map<Integer, Node> genehash = new HashMap<Integer, Node>() { private static final long serialVersionUID = 1L; { for (int index = size; --index >= 0; put(index, nodeBuilder().leaf(dataset.dimension(dimensionType).keys().get(index)))) ; } }; TreeMap<Double, int[]> sorted = new TreeMap<>(); log.debug("Populating distance matrix"); for (int i = 0; i < size; i++) { for (int j = i + 1; j < size; j++) { double distance = metric.distance(new AbstractList<Double>() { private int i; @Override public Double get(int index) { return dimensionType == ROW ? original.getEntry(i, index) : original.getEntry(index, i); } @Override public int size() { return other; } private List<Double> initializeProjection(int i) { this.i = i; return this; } }.initializeProjection(i), new AbstractList<Double>() { private int j; @Override public Double get(int index) { return dimensionType == ROW ? original.getEntry(j, index) : original.getEntry(index, j); } @Override public int size() { return other; } private List<Double> initializeProjection(int j) { this.j = j; return this; } }.initializeProjection(j)); distances[i][j] = distance; distances[j][i] = distance; int[] genePair = { i, j }; // Enter the distance calculated and the genes measured into a // treemap. Will be automatically sorted. sorted.put(distance, genePair); } } log.debug("Aggregating"); while (true) { // Get the first key of the TreeMap. Will be the shortest distance de // facto. final double minkey = (Double) sorted.firstKey(); int[] minValues = (int[]) sorted.firstEntry().getValue(); final int value1 = minValues[0], value2 = minValues[1]; // find Node cluster = nodeBuilder().branch(minkey, genehash.get(value1), genehash.get(value2)); int id = enumerator.next(); genehash.put(id, cluster); genehash.remove(value1); genehash.remove(value2); if (genehash.size() <= 1) break; // Iterate over all the current clusters to remeasure distance with the // previously clustered group. for (Entry<Integer, Node> e : genehash.entrySet()) { Node c = e.getValue(); // Skip measuring the new cluster with itself. if (c == cluster) continue; List<Double> aggregation = new ArrayList<>(); // Get genes from each cluster. Distance is measured from each element // to every element. for (int current : traverse(dimension.keys(), c)) for (int created : traverse(dimension.keys(), cluster)) aggregation.add(distances[current][created]); int[] valuePair = { e.getKey(), id }; sorted.put(linkage.aggregate(aggregation), valuePair); } // Get the shortest distance. // Check to make sure shortest distance does not include a gene pair // that // has already had its elements clustered. boolean minimized = false; while (!minimized) { double mk = sorted.firstKey(); minValues = sorted.firstEntry().getValue(); // If the gene pair is not present in the current gene set, remove // this distance. if (!genehash.containsKey(minValues[0]) || !genehash.containsKey(minValues[1])) sorted.remove(mk); else minimized = true; } } Node result = genehash.entrySet().iterator().next().getValue(); log.debug("Clustered " + result); return result; }
From source file:org.apache.hadoop.mapred.HFSPScheduler.java
private void assignSizeBasedTasks(TaskType type, HelperForType helper, TreeMap<JobDurationInfo, JobInProgress> sizeBasedJobs, TreeMap<JobDurationInfo, TaskStatuses> taskStatusesSizeBased) throws IOException { final boolean isMap = type == TaskType.MAP; int totClaimedSlots = 0; // StringBuilder builder = new StringBuilder("SBJobs("); // builder.append(type).append("): ["); // boolean first = true; // for (Entry<JobDurationInfo,JobInProgress> jip : sizeBasedJobs.entrySet()) // {/*from ww w.j ava2 s . c o m*/ // if (first) // first = false; // else // builder.append(","); // builder.append(jip.getValue().getJobID()) // .append(" -> ") // .append(jip.getKey().getPhaseDuration()) // .append("/") // .append(jip.getKey().getPhaseTotalDuration()) // .append(" p: ") // .append(this.getNumPendingNewTasks(jip.getValue(), type)) // .append(" r: ") // .append(this.getNumRunningTasks(jip.getValue(), type)) // .append(" f: ") // .append(this.getNumFinishedTasks(jip.getValue(), type)); // } // builder.append("]"); // LOG.debug(builder.toString()); for (Entry<JobDurationInfo, JobInProgress> entry : sizeBasedJobs.entrySet()) { JobInProgress jip = entry.getValue(); JobDurationInfo jdi = entry.getKey(); TaskStatuses taskStatuses = taskStatusesSizeBased.get(jdi); if (!this.isJobReadyForTypeScheduling(jip, type)) { if (LOG.isDebugEnabled() && jip.getStatus().getRunState() != JobStatus.SUCCEEDED) { LOG.debug( "SIZEBASED(" + jip.getJobID() + ":" + type + "):" + "job is not ready for scheduling (" + "status: " + JobStatus.getJobRunState(jip.getStatus().getRunState()) + ", mapProgress: " + jip.getStatus().mapProgress() + ", reduceProgress: " + jip.getStatus().reduceProgress() + ", scheduleReduces: " + jip.scheduleReduces() + ")"); } continue; } // NEW int pendingNewTasks = this.getNumPendingNewTasks(jip, type); int pendingResumableTasks = (taskStatuses == null) ? 0 : taskStatuses.suspendedTaskStatuses.size(); int totAvailableSizeBasedSlots = helper.totAvailableSizeBasedSlots(); // missing slots for resumable int missingResumableSlots = 0; if (pendingResumableTasks > 0 && pendingResumableTasks > totAvailableSizeBasedSlots) { if (totAvailableSizeBasedSlots <= 0) missingResumableSlots = pendingResumableTasks; else missingResumableSlots = pendingResumableTasks - totAvailableSizeBasedSlots; totAvailableSizeBasedSlots = (pendingResumableTasks > totAvailableSizeBasedSlots) ? 0 : totAvailableSizeBasedSlots - pendingResumableTasks; } int missingNewSlots = 0; if (pendingNewTasks > 0 && pendingNewTasks > totAvailableSizeBasedSlots) { if (totAvailableSizeBasedSlots <= 0) missingNewSlots = pendingNewTasks; else missingNewSlots = pendingNewTasks - totAvailableSizeBasedSlots; totAvailableSizeBasedSlots = (pendingNewTasks > totAvailableSizeBasedSlots) ? 0 : totAvailableSizeBasedSlots - pendingNewTasks; } TreeMap<TaskAttemptID, TaskStatus> suspended = null; if (taskStatuses != null) suspended = taskStatuses.suspendedTaskStatuses; if (pendingNewTasks > 0 || pendingResumableTasks > 0 || (suspended != null && !suspended.isEmpty())) { LOG.debug(jip.getJobID() + ":" + type + " (d: " + jdi.getPhaseDuration() + "/" + jdi.getPhaseTotalDuration() + "):" + " pendingNewTasks: " + pendingNewTasks + " pendingResumableTasks: " + pendingResumableTasks // + " notResumableTasksOnThisTT: " + notResumableTasks + " totAvailableSizeBasedSlots: " + (helper.totAvailableSizeBasedSlots() <= 0 ? 0 : helper.totAvailableSizeBasedSlots()) + " currAvailableSlots: " + helper.currAvailableSlots + " => missingNewSlots: " + missingNewSlots + " missingResumableSlots: " + missingResumableSlots); } if (this.preemptionStrategy.isPreemptionActive() && (missingNewSlots > 0 || missingResumableSlots > 0)) { ClaimedSlots claimedSlots = this.claimSlots(helper, Phase.SIZE_BASED, jip, missingNewSlots, missingResumableSlots, totClaimedSlots, sizeBasedJobs, taskStatusesSizeBased); totClaimedSlots += claimedSlots.getNumPreemptedForNewTasks() + claimedSlots.getNumPreemptedForResumableTasks(); LOG.debug(jip.getJobID() + " taskStatusesOnTT: " + taskStatusesSizeBased.get(jdi) + " pendingNewTasks: " + pendingNewTasks + " pendingResumableTasks: " + pendingResumableTasks + " missingNewSlots: " + missingNewSlots + " missingResumableSlots: " + missingResumableSlots); } while (pendingNewTasks > 0 || pendingResumableTasks > 0 || (suspended != null && !suspended.isEmpty())) { if (helper.currAvailableSlots <= 0) { LOG.debug("SIZEBASED(" + jip.getJobID() + ":" + type + "):" + " no slots available on " + taskHelper.ttStatus.getTrackerName()); return; } LOG.debug("SIZEBASED(" + jip.getJobID() + ":" + type + "):" + " totAvailableSizeBasedSlots(): " + helper.totAvailableSizeBasedSlots() + " pendingNewTasks: " + pendingNewTasks + " pendingResumableTasks: " + pendingResumableTasks + " suspended(" + (suspended == null ? 0 : suspended.size()) + "): " + suspended); if (this.preemptionStrategy.isPreemptionActive() && (suspended != null && !suspended.isEmpty())) { TaskStatus toResume = suspended.remove(suspended.firstKey()); // LOG.debug("RESUME: " + toResume.getTaskID() + " " + // toResume.getRunState()); TaskAttemptID tAID = toResume.getTaskID(); JobInProgress rJIP = this.taskTrackerManager.getJob(tAID.getTaskID().getJobID()); TaskInProgress tip = rJIP.getTaskInProgress(tAID.getTaskID()); if (this.preemptionStrategy.resume(tip, toResume)) { taskHelper.resume(tAID, Phase.SIZE_BASED); pendingResumableTasks -= 1; } else { LOG.debug("SIZEBASED(" + jip.getJobID() + ":" + type + "):" + " cannot resume " + tAID + " on " + taskHelper.ttStatus.getTrackerName()); } } else { Task task = this.obtainNewTask(jip, taskHelper.ttStatus, isMap, taskHelper.currentTime); if (task == null) { LOG.debug("SIZEBASED(" + jip.getJobID() + ":" + type + "):" + " cannot obtain slot for new task on " + taskHelper.ttStatus.getTrackerName() + " (#pendingNew: " + pendingNewTasks + ", #pendingResumable: " + pendingResumableTasks + ", #free_" + type + "_slots: " + helper.currAvailableSlots + ")"); break; } taskHelper.slotObtained(task, Phase.SIZE_BASED); pendingNewTasks -= 1; } } } }
From source file:org.apache.hadoop.hbase.util.RegionSplitter.java
static void rollingSplit(String tableName, SplitAlgorithm splitAlgo, Configuration conf) throws IOException, InterruptedException { final int minOS = conf.getInt("split.outstanding", 2); HTable table = new HTable(conf, tableName); // max outstanding splits. default == 50% of servers final int MAX_OUTSTANDING = Math.max(table.getConnection().getCurrentNrHRS() / 2, minOS); Path hbDir = FSUtils.getRootDir(conf); Path tableDir = FSUtils.getTableDir(hbDir, table.getName()); Path splitFile = new Path(tableDir, "_balancedSplit"); FileSystem fs = FileSystem.get(conf); // get a list of daughter regions to create LinkedList<Pair<byte[], byte[]>> tmpRegionSet = getSplits(table, splitAlgo); LinkedList<Pair<byte[], byte[]>> outstanding = Lists.newLinkedList(); int splitCount = 0; final int origCount = tmpRegionSet.size(); // all splits must compact & we have 1 compact thread, so 2 split // requests to the same RS can stall the outstanding split queue. // To fix, group the regions into an RS pool and round-robin through it LOG.debug("Bucketing regions by regionserver..."); TreeMap<String, LinkedList<Pair<byte[], byte[]>>> daughterRegions = Maps.newTreeMap(); for (Pair<byte[], byte[]> dr : tmpRegionSet) { String rsLocation = table.getRegionLocation(dr.getSecond()).getHostnamePort(); if (!daughterRegions.containsKey(rsLocation)) { LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList(); daughterRegions.put(rsLocation, entry); }//from w w w .java 2 s .c om daughterRegions.get(rsLocation).add(dr); } LOG.debug("Done with bucketing. Split time!"); long startTime = System.currentTimeMillis(); // open the split file and modify it as splits finish FSDataInputStream tmpIn = fs.open(splitFile); byte[] rawData = new byte[tmpIn.available()]; tmpIn.readFully(rawData); tmpIn.close(); FSDataOutputStream splitOut = fs.create(splitFile); splitOut.write(rawData); try { // *** split code *** while (!daughterRegions.isEmpty()) { LOG.debug(daughterRegions.size() + " RS have regions to splt."); // Get RegionServer : region count mapping final TreeMap<ServerName, Integer> rsSizes = Maps.newTreeMap(); Map<HRegionInfo, ServerName> regionsInfo = table.getRegionLocations(); for (ServerName rs : regionsInfo.values()) { if (rsSizes.containsKey(rs)) { rsSizes.put(rs, rsSizes.get(rs) + 1); } else { rsSizes.put(rs, 1); } } // sort the RS by the number of regions they have List<String> serversLeft = Lists.newArrayList(daughterRegions.keySet()); Collections.sort(serversLeft, new Comparator<String>() { public int compare(String o1, String o2) { return rsSizes.get(o1).compareTo(rsSizes.get(o2)); } }); // round-robin through the RS list. Choose the lightest-loaded servers // first to keep the master from load-balancing regions as we split. for (String rsLoc : serversLeft) { Pair<byte[], byte[]> dr = null; // find a region in the RS list that hasn't been moved LOG.debug("Finding a region on " + rsLoc); LinkedList<Pair<byte[], byte[]>> regionList = daughterRegions.get(rsLoc); while (!regionList.isEmpty()) { dr = regionList.pop(); // get current region info byte[] split = dr.getSecond(); HRegionLocation regionLoc = table.getRegionLocation(split); // if this region moved locations String newRs = regionLoc.getHostnamePort(); if (newRs.compareTo(rsLoc) != 0) { LOG.debug("Region with " + splitAlgo.rowToStr(split) + " moved to " + newRs + ". Relocating..."); // relocate it, don't use it right now if (!daughterRegions.containsKey(newRs)) { LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList(); daughterRegions.put(newRs, entry); } daughterRegions.get(newRs).add(dr); dr = null; continue; } // make sure this region wasn't already split byte[] sk = regionLoc.getRegionInfo().getStartKey(); if (sk.length != 0) { if (Bytes.equals(split, sk)) { LOG.debug("Region already split on " + splitAlgo.rowToStr(split) + ". Skipping this region..."); ++splitCount; dr = null; continue; } byte[] start = dr.getFirst(); Preconditions.checkArgument(Bytes.equals(start, sk), splitAlgo.rowToStr(start) + " != " + splitAlgo.rowToStr(sk)); } // passed all checks! found a good region break; } if (regionList.isEmpty()) { daughterRegions.remove(rsLoc); } if (dr == null) continue; // we have a good region, time to split! byte[] split = dr.getSecond(); LOG.debug("Splitting at " + splitAlgo.rowToStr(split)); HBaseAdmin admin = new HBaseAdmin(table.getConfiguration()); admin.split(table.getTableName(), split); LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList(); if (conf.getBoolean("split.verify", true)) { // we need to verify and rate-limit our splits outstanding.addLast(dr); // with too many outstanding splits, wait for some to finish while (outstanding.size() >= MAX_OUTSTANDING) { finished = splitScan(outstanding, table, splitAlgo); if (finished.isEmpty()) { Thread.sleep(30 * 1000); } else { outstanding.removeAll(finished); } } } else { finished.add(dr); } // mark each finished region as successfully split. for (Pair<byte[], byte[]> region : finished) { splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) + " " + splitAlgo.rowToStr(region.getSecond()) + "\n"); splitCount++; if (splitCount % 10 == 0) { long tDiff = (System.currentTimeMillis() - startTime) / splitCount; LOG.debug("STATUS UPDATE: " + splitCount + " / " + origCount + ". Avg Time / Split = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff)); } } } } if (conf.getBoolean("split.verify", true)) { while (!outstanding.isEmpty()) { LinkedList<Pair<byte[], byte[]>> finished = splitScan(outstanding, table, splitAlgo); if (finished.isEmpty()) { Thread.sleep(30 * 1000); } else { outstanding.removeAll(finished); for (Pair<byte[], byte[]> region : finished) { splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) + " " + splitAlgo.rowToStr(region.getSecond()) + "\n"); } } } } LOG.debug("All regions have been successfully split!"); } finally { long tDiff = System.currentTimeMillis() - startTime; LOG.debug("TOTAL TIME = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff)); LOG.debug("Splits = " + splitCount); LOG.debug("Avg Time / Split = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff / splitCount)); splitOut.close(); if (table != null) { table.close(); } } fs.delete(splitFile, false); }
From source file:org.apache.nutch.crawl.CrawlDbReader.java
private TreeMap<String, Writable> processStatJobHelper(String crawlDb, Configuration config, boolean sort) throws IOException, InterruptedException, ClassNotFoundException { Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis()); Job job = NutchJob.getInstance(config); config = job.getConfiguration();//from w ww . jav a 2 s . co m job.setJobName("stats " + crawlDb); config.setBoolean("db.reader.stats.sort", sort); FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setJarByClass(CrawlDbReader.class); job.setMapperClass(CrawlDbStatMapper.class); job.setCombinerClass(CrawlDbStatReducer.class); job.setReducerClass(CrawlDbStatReducer.class); FileOutputFormat.setOutputPath(job, tmpFolder); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NutchWritable.class); // https://issues.apache.org/jira/browse/NUTCH-1029 config.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); FileSystem fileSystem = tmpFolder.getFileSystem(config); try { boolean success = job.waitForCompletion(true); if (!success) { String message = "CrawlDbReader job did not succeed, job status:" + job.getStatus().getState() + ", reason: " + job.getStatus().getFailureInfo(); LOG.error(message); fileSystem.delete(tmpFolder, true); throw new RuntimeException(message); } } catch (IOException | InterruptedException | ClassNotFoundException e) { LOG.error(StringUtils.stringifyException(e)); fileSystem.delete(tmpFolder, true); throw e; } // reading the result SequenceFile.Reader[] readers = SegmentReaderUtil.getReaders(tmpFolder, config); Text key = new Text(); NutchWritable value = new NutchWritable(); TreeMap<String, Writable> stats = new TreeMap<>(); for (int i = 0; i < readers.length; i++) { SequenceFile.Reader reader = readers[i]; while (reader.next(key, value)) { String k = key.toString(); Writable val = stats.get(k); if (val == null) { stats.put(k, value.get()); continue; } if (k.equals("sc")) { float min = Float.MAX_VALUE; float max = Float.MIN_VALUE; if (stats.containsKey("scn")) { min = ((FloatWritable) stats.get("scn")).get(); } else { min = ((FloatWritable) stats.get("sc")).get(); } if (stats.containsKey("scx")) { max = ((FloatWritable) stats.get("scx")).get(); } else { max = ((FloatWritable) stats.get("sc")).get(); } float fvalue = ((FloatWritable) value.get()).get(); if (min > fvalue) { min = fvalue; } if (max < fvalue) { max = fvalue; } stats.put("scn", new FloatWritable(min)); stats.put("scx", new FloatWritable(max)); } else if (k.equals("ft") || k.equals("fi")) { long min = Long.MAX_VALUE; long max = Long.MIN_VALUE; String minKey = k + "n"; String maxKey = k + "x"; if (stats.containsKey(minKey)) { min = ((LongWritable) stats.get(minKey)).get(); } else if (stats.containsKey(k)) { min = ((LongWritable) stats.get(k)).get(); } if (stats.containsKey(maxKey)) { max = ((LongWritable) stats.get(maxKey)).get(); } else if (stats.containsKey(k)) { max = ((LongWritable) stats.get(k)).get(); } long lvalue = ((LongWritable) value.get()).get(); if (min > lvalue) { min = lvalue; } if (max < lvalue) { max = lvalue; } stats.put(k + "n", new LongWritable(min)); stats.put(k + "x", new LongWritable(max)); } else if (k.equals("sct")) { FloatWritable fvalue = (FloatWritable) value.get(); ((FloatWritable) val).set(((FloatWritable) val).get() + fvalue.get()); } else if (k.equals("scd")) { MergingDigest tdigest = null; MergingDigest tdig = MergingDigest .fromBytes(ByteBuffer.wrap(((BytesWritable) value.get()).getBytes())); if (val instanceof BytesWritable) { tdigest = MergingDigest.fromBytes(ByteBuffer.wrap(((BytesWritable) val).getBytes())); tdigest.add(tdig); } else { tdigest = tdig; } ByteBuffer tdigestBytes = ByteBuffer.allocate(tdigest.smallByteSize()); tdigest.asSmallBytes(tdigestBytes); stats.put(k, new BytesWritable(tdigestBytes.array())); } else { LongWritable lvalue = (LongWritable) value.get(); ((LongWritable) val).set(((LongWritable) val).get() + lvalue.get()); } } reader.close(); } // remove score, fetch interval, and fetch time // (used for min/max calculation) stats.remove("sc"); stats.remove("fi"); stats.remove("ft"); // removing the tmp folder fileSystem.delete(tmpFolder, true); return stats; }
From source file:org.apache.pdfbox.pdfparser.NonSequentialPDFParser.java
/** * Will parse every object necessary to load a single page from the pdf document. * We try our best to order objects according to offset in file before reading * to minimize seek operations.// www . j ava2s .co m * * @param dict the COSObject from the parent pages. * @param excludeObjects dictionary object reference entries with these names will not be parsed * * @throws IOException */ private void parseDictObjects(COSDictionary dict, COSName... excludeObjects) throws IOException { // ---- create queue for objects waiting for further parsing final Queue<COSBase> toBeParsedList = new LinkedList<COSBase>(); // offset ordered object map final TreeMap<Long, List<COSObject>> objToBeParsed = new TreeMap<Long, List<COSObject>>(); // in case of compressed objects offset points to stmObj final Set<Long> parsedObjects = new HashSet<Long>(); final Set<Long> addedObjects = new HashSet<Long>(); // ---- add objects not to be parsed to list of already parsed objects if (excludeObjects != null) { for (COSName objName : excludeObjects) { COSBase baseObj = dict.getItem(objName); if (baseObj instanceof COSObject) { parsedObjects.add(getObjectId((COSObject) baseObj)); } } } addNewToList(toBeParsedList, dict.getValues(), addedObjects); // ---- go through objects to be parsed while (!(toBeParsedList.isEmpty() && objToBeParsed.isEmpty())) { // -- first get all COSObject from other kind of objects and // put them in objToBeParsed; afterwards toBeParsedList is empty COSBase baseObj; while ((baseObj = toBeParsedList.poll()) != null) { if (baseObj instanceof COSStream) { addNewToList(toBeParsedList, ((COSStream) baseObj).getValues(), addedObjects); } else if (baseObj instanceof COSDictionary) { addNewToList(toBeParsedList, ((COSDictionary) baseObj).getValues(), addedObjects); } else if (baseObj instanceof COSArray) { final Iterator<COSBase> arrIter = ((COSArray) baseObj).iterator(); while (arrIter.hasNext()) { addNewToList(toBeParsedList, arrIter.next(), addedObjects); } } else if (baseObj instanceof COSObject) { COSObject obj = (COSObject) baseObj; long objId = getObjectId(obj); COSObjectKey objKey = new COSObjectKey(obj.getObjectNumber().intValue(), obj.getGenerationNumber().intValue()); if (!(parsedObjects.contains(objId) /*|| document.hasObjectInPool( objKey ) */ )) { Long fileOffset = xrefTrailerResolver.getXrefTable().get(objKey); // it is allowed that object references point to null, thus we have to test if (fileOffset != null) { if (fileOffset > 0) { objToBeParsed.put(fileOffset, Collections.singletonList(obj)); } else { // negative offset means we have a compressed object within object stream; // get offset of object stream fileOffset = xrefTrailerResolver.getXrefTable() .get(new COSObjectKey(-fileOffset, 0)); if ((fileOffset == null) || (fileOffset <= 0)) { throw new IOException( "Invalid object stream xref object reference: " + fileOffset); } List<COSObject> stmObjects = objToBeParsed.get(fileOffset); if (stmObjects == null) { objToBeParsed.put(fileOffset, stmObjects = new ArrayList<COSObject>()); } stmObjects.add(obj); } } else { // NULL object COSObject pdfObject = document.getObjectFromPool(objKey); pdfObject.setObject(COSNull.NULL); } } } } // ---- read first COSObject with smallest offset; // resulting object will be added to toBeParsedList if (objToBeParsed.isEmpty()) { break; } for (COSObject obj : objToBeParsed.remove(objToBeParsed.firstKey())) { COSBase parsedObj = parseObjectDynamically(obj, false); obj.setObject(parsedObj); addNewToList(toBeParsedList, parsedObj, addedObjects); parsedObjects.add(getObjectId(obj)); } } }