List of usage examples for java.util TreeMap clear
public void clear()
From source file:hydrograph.ui.dataviewer.filter.FilterConditionsDialog.java
/** * Gets the clear button listener.//from w ww . jav a2 s . c om * * @param tableViewer * the table viewer * @param conditionsList * the conditions list * @param dummyList * the dummy list * @param originalFilterConditions * the original filter conditions * @param isRemote * the is remote * @param retainButton * the retain button * @param groupSelectionMap * the group selection map * @param btnAddGrp * the btn add grp * @return the clear button listener */ public SelectionListener getClearButtonListener(final TableViewer tableViewer, final List<Condition> conditionsList, final List<Condition> dummyList, final FilterConditions originalFilterConditions, final boolean isRemote, final Button retainButton, final TreeMap<Integer, List<List<Integer>>> groupSelectionMap, final Button btnAddGrp) { SelectionListener listner = new SelectionListener() { @Override public void widgetSelected(SelectionEvent e) { dummyList.clear(); conditionsList.clear(); dummyList.add(new Condition()); retainButton.setSelection(false); groupSelectionMap.clear(); if (isRemote) { originalFilterConditions.setRetainRemote(false); originalFilterConditions.setRemoteCondition(""); originalFilterConditions.setRemoteConditions(new ArrayList<Condition>()); debugDataViewer.setRemoteCondition(""); originalFilterConditions.setRemoteGroupSelectionMap(groupSelectionMap); debugDataViewer.getDataViewerAdapter().setFilterCondition(debugDataViewer.getLocalCondition()); retainRemoteFilter.setRetainFilter(false); FilterHelper.INSTANCE.setRemoteCondition(""); ((ReloadAction) debugDataViewer.getActionFactory().getAction(ReloadAction.class.getName())) .setIfFilterReset(true); } else { originalFilterConditions.setRetainLocal(false); originalFilterConditions.setLocalCondition(""); originalFilterConditions.setLocalConditions(new ArrayList<Condition>()); debugDataViewer.setLocalCondition(""); originalFilterConditions.setLocalGroupSelectionMap(groupSelectionMap); debugDataViewer.getDataViewerAdapter().setFilterCondition(debugDataViewer.getRemoteCondition()); retainLocalFilter.setRetainFilter(false); FilterHelper.INSTANCE.setLocalCondition(""); } TableItem[] items = tableViewer.getTable().getItems(); tableViewer.refresh(); for (int i = 0; i < items.length; i++) { items[i].dispose(); } conditionsList.add(0, new Condition()); FilterHelper.INSTANCE.disposeAllColumns(tableViewer); redrawAllColumns(tableViewer, conditionsList, btnAddGrp, groupSelectionMap, isRemote); ((ReloadAction) debugDataViewer.getActionFactory().getAction(ReloadAction.class.getName())).run(); debugDataViewer.submitRecordCountJob(); cancelPressed(); } @Override public void widgetDefaultSelected(SelectionEvent e) { } }; return listner; }
From source file:hydrograph.ui.dataviewer.filter.FilterConditionsDialog.java
/** * Clear groups listner.//from w ww . ja v a 2s .c o m * * @param tableViewer * the table viewer * @param groupSelectionMap * the group selection map * @param clearGroups * the clear groups * @param btnAddGrp * the btn add grp * @param conditionsList * the conditions list * @param isRemote * the is remote * @return the selection listener */ public SelectionListener clearGroupsListner(final TableViewer tableViewer, final TreeMap<Integer, List<List<Integer>>> groupSelectionMap, final Button clearGroups, final Button btnAddGrp, final List<Condition> conditionsList, final boolean isRemote) { SelectionListener listener = new SelectionListener() { @Override public void widgetSelected(SelectionEvent e) { dummyList.clear(); dummyList.addAll(FilterHelper.INSTANCE.cloneList(conditionsList)); groupSelectionMap.clear(); if (isRemote) { originalFilterConditions.setRemoteGroupSelectionMap(groupSelectionMap); } else { originalFilterConditions.setLocalGroupSelectionMap(groupSelectionMap); } FilterHelper.INSTANCE.disposeAllColumns(tableViewer); redrawAllColumns(tableViewer, conditionsList, btnAddGrp, groupSelectionMap, isRemote); clearGroups.setEnabled(false); } @Override public void widgetDefaultSelected(SelectionEvent e) { } }; return listener; }
From source file:hydrograph.ui.dataviewer.filter.FilterConditionsDialog.java
private boolean storeGroupSelection(TableViewer tableViewer, TreeMap<Integer, List<List<Integer>>> groupSelectionMap) { boolean retVal = false; List<List<Integer>> grpList = new ArrayList<>(); List<Integer> selectionList = new ArrayList<>(); TableItem[] items = tableViewer.getTable().getItems(); for (TableItem tableItem : items) { Button button = (Button) tableItem.getData(GROUP_CHECKBOX); if (button.getSelection()) { selectionList.add(tableViewer.getTable().indexOf(tableItem)); }//from w ww. j a v a2 s. c om } if (groupSelectionMap.isEmpty()) { grpList.add(selectionList); groupSelectionMap.put(0, grpList); retVal = true; } else { if (FilterHelper.INSTANCE.validateUserGroupSelection(groupSelectionMap, selectionList)) { if (FilterHelper.INSTANCE.isColumnModifiable(groupSelectionMap, selectionList)) { retVal = true; } else { grpList.add(selectionList); Map<Integer, List<List<Integer>>> tempMap = new TreeMap<>(); tempMap.putAll(groupSelectionMap); groupSelectionMap.clear(); groupSelectionMap.put(0, grpList); for (int i = 0; i < tempMap.size(); i++) { groupSelectionMap.put(i + 1, tempMap.get(i)); } retVal = true; FilterHelper.INSTANCE.rearrangeGroups(groupSelectionMap, selectionList); } } } return retVal; }
From source file:org.apache.hadoop.hbase.regionserver.StripeStoreFileManager.java
/** * Loads initial store files that were picked up from some physical location pertaining to * this store (presumably). Unlike adding files after compaction, assumes empty initial * sets, and is forgiving with regard to stripe constraints - at worst, many/all files will * go to level 0.//ww w. j a v a 2s .c om * @param storeFiles Store files to add. */ private void loadUnclassifiedStoreFiles(List<StoreFile> storeFiles) { LOG.debug("Attempting to load " + storeFiles.size() + " store files."); TreeMap<byte[], ArrayList<StoreFile>> candidateStripes = new TreeMap<byte[], ArrayList<StoreFile>>( MAP_COMPARATOR); ArrayList<StoreFile> level0Files = new ArrayList<StoreFile>(); // Separate the files into tentative stripes; then validate. Currently, we rely on metadata. // If needed, we could dynamically determine the stripes in future. for (StoreFile sf : storeFiles) { byte[] startRow = startOf(sf), endRow = endOf(sf); // Validate the range and put the files into place. if (isInvalid(startRow) || isInvalid(endRow)) { insertFileIntoStripe(level0Files, sf); // No metadata - goes to L0. ensureLevel0Metadata(sf); } else if (!isOpen(startRow) && !isOpen(endRow) && nonOpenRowCompare(startRow, endRow) >= 0) { LOG.error("Unexpected metadata - start row [" + Bytes.toString(startRow) + "], end row [" + Bytes.toString(endRow) + "] in file [" + sf.getPath() + "], pushing to L0"); insertFileIntoStripe(level0Files, sf); // Bad metadata - goes to L0 also. ensureLevel0Metadata(sf); } else { ArrayList<StoreFile> stripe = candidateStripes.get(endRow); if (stripe == null) { stripe = new ArrayList<StoreFile>(); candidateStripes.put(endRow, stripe); } insertFileIntoStripe(stripe, sf); } } // Possible improvement - for variable-count stripes, if all the files are in L0, we can // instead create single, open-ended stripe with all files. boolean hasOverlaps = false; byte[] expectedStartRow = null; // first stripe can start wherever Iterator<Map.Entry<byte[], ArrayList<StoreFile>>> entryIter = candidateStripes.entrySet().iterator(); while (entryIter.hasNext()) { Map.Entry<byte[], ArrayList<StoreFile>> entry = entryIter.next(); ArrayList<StoreFile> files = entry.getValue(); // Validate the file start rows, and remove the bad ones to level 0. for (int i = 0; i < files.size(); ++i) { StoreFile sf = files.get(i); byte[] startRow = startOf(sf); if (expectedStartRow == null) { expectedStartRow = startRow; // ensure that first stripe is still consistent } else if (!rowEquals(expectedStartRow, startRow)) { hasOverlaps = true; LOG.warn("Store file doesn't fit into the tentative stripes - expected to start at [" + Bytes.toString(expectedStartRow) + "], but starts at [" + Bytes.toString(startRow) + "], to L0 it goes"); StoreFile badSf = files.remove(i); insertFileIntoStripe(level0Files, badSf); ensureLevel0Metadata(badSf); --i; } } // Check if any files from the candidate stripe are valid. If so, add a stripe. byte[] endRow = entry.getKey(); if (!files.isEmpty()) { expectedStartRow = endRow; // Next stripe must start exactly at that key. } else { entryIter.remove(); } } // In the end, there must be open ends on two sides. If not, and there were no errors i.e. // files are consistent, they might be coming from a split. We will treat the boundaries // as open keys anyway, and log the message. // If there were errors, we'll play it safe and dump everything into L0. if (!candidateStripes.isEmpty()) { StoreFile firstFile = candidateStripes.firstEntry().getValue().get(0); boolean isOpen = isOpen(startOf(firstFile)) && isOpen(candidateStripes.lastKey()); if (!isOpen) { LOG.warn("The range of the loaded files does not cover full key space: from [" + Bytes.toString(startOf(firstFile)) + "], to [" + Bytes.toString(candidateStripes.lastKey()) + "]"); if (!hasOverlaps) { ensureEdgeStripeMetadata(candidateStripes.firstEntry().getValue(), true); ensureEdgeStripeMetadata(candidateStripes.lastEntry().getValue(), false); } else { LOG.warn("Inconsistent files, everything goes to L0."); for (ArrayList<StoreFile> files : candidateStripes.values()) { for (StoreFile sf : files) { insertFileIntoStripe(level0Files, sf); ensureLevel0Metadata(sf); } } candidateStripes.clear(); } } } // Copy the results into the fields. State state = new State(); state.level0Files = ImmutableList.copyOf(level0Files); state.stripeFiles = new ArrayList<ImmutableList<StoreFile>>(candidateStripes.size()); state.stripeEndRows = new byte[Math.max(0, candidateStripes.size() - 1)][]; ArrayList<StoreFile> newAllFiles = new ArrayList<StoreFile>(level0Files); int i = candidateStripes.size() - 1; for (Map.Entry<byte[], ArrayList<StoreFile>> entry : candidateStripes.entrySet()) { state.stripeFiles.add(ImmutableList.copyOf(entry.getValue())); newAllFiles.addAll(entry.getValue()); if (i > 0) { state.stripeEndRows[state.stripeFiles.size() - 1] = entry.getKey(); } --i; } state.allFilesCached = ImmutableList.copyOf(newAllFiles); this.state = state; debugDumpState("Files loaded"); }
From source file:net.massbank.validator.RecordValidator.java
/** * ??/* www. j a va 2 s.c om*/ * * @param db * DB * @param op * PrintWriter? * @param dataPath * ? * @param registPath * * @param ver * ? * @return ??Map<??, ?> * @throws IOException * */ private static TreeMap<String, String> validationRecordOnline(DatabaseAccess db, PrintStream op, String dataPath, String registPath, int ver) throws IOException { op.println(msgInfo("validation archive is [" + UPLOAD_RECDATA_ZIP + "] or [" + UPLOAD_RECDATA_MSBK + "].")); if (ver == 1) { op.println(msgInfo("check record format version is [version 1].")); } final String[] dataList = (new File(dataPath)).list(); TreeMap<String, String> validationMap = new TreeMap<String, String>(); if (dataList.length == 0) { op.println(msgWarn("no file for validation.")); return validationMap; } // ---------------------------------------------------- // ??? // ---------------------------------------------------- String[] requiredList = new String[] { // Ver.2 "ACCESSION: ", "RECORD_TITLE: ", "DATE: ", "AUTHORS: ", "LICENSE: ", "CH$NAME: ", "CH$COMPOUND_CLASS: ", "CH$FORMULA: ", "CH$EXACT_MASS: ", "CH$SMILES: ", "CH$IUPAC: ", "AC$INSTRUMENT: ", "AC$INSTRUMENT_TYPE: ", "AC$MASS_SPECTROMETRY: MS_TYPE ", "AC$MASS_SPECTROMETRY: ION_MODE ", "PK$NUM_PEAK: ", "PK$PEAK: " }; if (ver == 1) { // Ver.1 requiredList = new String[] { "ACCESSION: ", "RECORD_TITLE: ", "DATE: ", "AUTHORS: ", "COPYRIGHT: ", "CH$NAME: ", "CH$COMPOUND_CLASS: ", "CH$FORMULA: ", "CH$EXACT_MASS: ", "CH$SMILES: ", "CH$IUPAC: ", "AC$INSTRUMENT: ", "AC$INSTRUMENT_TYPE: ", "AC$ANALYTICAL_CONDITION: MODE ", "PK$NUM_PEAK: ", "PK$PEAK: " }; } for (int i = 0; i < dataList.length; i++) { String name = dataList[i]; String status = ""; StringBuilder detailsErr = new StringBuilder(); StringBuilder detailsWarn = new StringBuilder(); // ???? File file = new File(dataPath + name); if (file.isDirectory()) { // ?? status = STATUS_ERR; detailsErr.append("[" + name + "] is directory."); validationMap.put(name, status + "\t" + detailsErr.toString()); continue; } else if (file.isHidden()) { // ??? status = STATUS_ERR; detailsErr.append("[" + name + "] is hidden."); validationMap.put(name, status + "\t" + detailsErr.toString()); continue; } else if (name.lastIndexOf(REC_EXTENSION) == -1) { // ???? status = STATUS_ERR; detailsErr.append("file extension of [" + name + "] is not [" + REC_EXTENSION + "]."); validationMap.put(name, status + "\t" + detailsErr.toString()); continue; } // ?? boolean isEndTagRead = false; boolean isInvalidInfo = false; boolean isDoubleByte = false; ArrayList<String> fileContents = new ArrayList<String>(); boolean existLicense = false; // LICENSE?Ver.1 ArrayList<String> workChName = new ArrayList<String>(); // RECORD_TITLE??CH$NAME??Ver.1? String workAcInstrumentType = ""; // RECORD_TITLE??AC$INSTRUMENT_TYPE??Ver.1? String workAcMsType = ""; // RECORD_TITLE??AC$MASS_SPECTROMETRY: // MS_TYPE??Ver.2 String line = ""; BufferedReader br = null; try { br = new BufferedReader(new FileReader(file)); while ((line = br.readLine()) != null) { if (isEndTagRead) { if (!line.equals("")) { isInvalidInfo = true; } } // if (line.startsWith("//")) { isEndTagRead = true; } fileContents.add(line); // LICENSE?Ver.1 if (line.startsWith("LICENSE: ")) { existLicense = true; } // CH$NAME?Ver.1? else if (line.startsWith("CH$NAME: ")) { workChName.add(line.trim().replaceAll("CH\\$NAME: ", "")); } // AC$INSTRUMENT_TYPE?Ver.1? else if (line.startsWith("AC$INSTRUMENT_TYPE: ")) { workAcInstrumentType = line.trim().replaceAll("AC\\$INSTRUMENT_TYPE: ", ""); } // AC$MASS_SPECTROMETRY: MS_TYPE?Ver.2 else if (ver != 1 && line.startsWith("AC$MASS_SPECTROMETRY: MS_TYPE ")) { workAcMsType = line.trim().replaceAll("AC\\$MASS_SPECTROMETRY: MS_TYPE ", ""); } // ? if (!isDoubleByte) { byte[] bytes = line.getBytes("MS932"); if (bytes.length != line.length()) { isDoubleByte = true; } } } } catch (IOException e) { Logger.getLogger("global").severe("file read failed." + NEW_LINE + " " + file.getPath()); e.printStackTrace(); op.println(msgErr("server error.")); validationMap.clear(); return validationMap; } finally { try { if (br != null) { br.close(); } } catch (IOException e) { } } if (isInvalidInfo) { // ????? if (status.equals("")) status = STATUS_WARN; detailsWarn.append("invalid after the end tag [//]."); } if (isDoubleByte) { // ????? if (status.equals("")) status = STATUS_ERR; detailsErr.append("double-byte character included."); } if (ver == 1 && existLicense) { // LICENSE???Ver.1 if (status.equals("")) status = STATUS_ERR; detailsErr.append("[LICENSE: ] tag can not be used in record format [version 1]."); } // ---------------------------------------------------- // ???? // ---------------------------------------------------- boolean isNameCheck = false; int peakNum = -1; for (int j = 0; j < requiredList.length; j++) { String requiredStr = requiredList[j]; ArrayList<String> valStrs = new ArrayList<String>(); // boolean findRequired = false; // boolean findValue = false; // boolean isPeakMode = false; // for (int k = 0; k < fileContents.size(); k++) { String lineStr = fileContents.get(k); // ????RELATED_RECORD?????? if (lineStr.startsWith("//")) { // Ver.1? break; } else if (ver == 1 && lineStr.startsWith("RELATED_RECORD:")) { // Ver.1 break; } // ????? else if (isPeakMode) { findRequired = true; if (!lineStr.trim().equals("")) { valStrs.add(lineStr); } } // ?????? else if (lineStr.indexOf(requiredStr) != -1) { // findRequired = true; if (requiredStr.equals("PK$PEAK: ")) { isPeakMode = true; findValue = true; valStrs.add(lineStr.replace(requiredStr, "")); } else { // String tmpVal = lineStr.replace(requiredStr, ""); if (!tmpVal.trim().equals("")) { findValue = true; valStrs.add(tmpVal); } break; } } } if (!findRequired) { // ?????? status = STATUS_ERR; detailsErr.append("no required item [" + requiredStr + "]."); } else { if (!findValue) { // ????? status = STATUS_ERR; detailsErr.append("no value of required item [" + requiredStr + "]."); } else { // ??? // ---------------------------------------------------- // ?? // ---------------------------------------------------- String val = (valStrs.size() > 0) ? valStrs.get(0) : ""; // ACESSIONVer.1? if (requiredStr.equals("ACCESSION: ")) { if (!val.equals(name.replace(REC_EXTENSION, ""))) { status = STATUS_ERR; detailsErr.append("value of required item [" + requiredStr + "] not correspond to file name."); } if (val.length() != 8) { status = STATUS_ERR; detailsErr.append( "value of required item [" + requiredStr + "] is 8 digits necessary."); } } // RECORD_TITLEVer.1? else if (requiredStr.equals("RECORD_TITLE: ")) { if (!val.equals(DEFAULT_VALUE)) { if (val.indexOf(";") != -1) { String[] recTitle = val.split(";"); if (!workChName.contains(recTitle[0].trim())) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], compound name is not included in the [CH$NAME]."); } if (!workAcInstrumentType.equals(recTitle[1].trim())) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], instrument type is different from [AC$INSTRUMENT_TYPE]."); } if (ver != 1 && !workAcMsType.equals(recTitle[2].trim())) { // Ver.2 if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], ms type is different from [AC$MASS_SPECTROMETRY: MS_TYPE]."); } } else { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not record title format."); if (!workChName.contains(val)) { detailsWarn.append("value of required item [" + requiredStr + "], compound name is not included in the [CH$NAME]."); } if (!workAcInstrumentType.equals(DEFAULT_VALUE)) { detailsWarn.append("value of required item [" + requiredStr + "], instrument type is different from [AC$INSTRUMENT_TYPE]."); } if (ver != 1 && !workAcMsType.equals(DEFAULT_VALUE)) { // Ver.2 detailsWarn.append("value of required item [" + requiredStr + "], ms type is different from [AC$MASS_SPECTROMETRY: MS_TYPE]."); } } } else { if (!workAcInstrumentType.equals(DEFAULT_VALUE)) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], instrument type is different from [AC$INSTRUMENT_TYPE]."); } if (ver != 1 && !workAcMsType.equals(DEFAULT_VALUE)) { // Ver.2 if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], ms type is different from [AC$MASS_SPECTROMETRY: MS_TYPE]."); } } } // DATEVer.1? else if (requiredStr.equals("DATE: ") && !val.equals(DEFAULT_VALUE)) { val = val.replace(".", "/"); val = val.replace("-", "/"); try { DateFormat.getDateInstance(DateFormat.SHORT, Locale.JAPAN).parse(val); } catch (ParseException e) { if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [" + requiredStr + "] is not date format."); } } // CH$COMPOUND_CLASSVer.1? else if (requiredStr.equals("CH$COMPOUND_CLASS: ") && !val.equals(DEFAULT_VALUE)) { if (!val.startsWith("Natural Product") && !val.startsWith("Non-Natural Product")) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not compound class format."); } } // CH$EXACT_MASSVer.1? else if (requiredStr.equals("CH$EXACT_MASS: ") && !val.equals(DEFAULT_VALUE)) { try { Double.parseDouble(val); } catch (NumberFormatException e) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not numeric."); } } // AC$INSTRUMENT_TYPEVer.1? else if (requiredStr.equals("AC$INSTRUMENT_TYPE: ") && !val.equals(DEFAULT_VALUE)) { if (val.trim().indexOf(" ") != -1) { if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [" + requiredStr + "] is space included."); } if (val.trim().indexOf(" ") != -1) { if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [" + requiredStr + "] is space included."); } } // AC$MASS_SPECTROMETRY: MS_TYPEVer.2 else if (ver != 1 && requiredStr.equals("AC$MASS_SPECTROMETRY: MS_TYPE ") && !val.equals(DEFAULT_VALUE)) { boolean isMsType = true; if (val.startsWith("MS")) { val = val.replace("MS", ""); if (!val.equals("")) { try { Integer.parseInt(val); } catch (NumberFormatException e) { isMsType = false; } } } else { isMsType = false; } if (!isMsType) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not \"MSn\"."); } } // AC$MASS_SPECTROMETRY: // ION_MODEVer.2?AC$ANALYTICAL_CONDITION: MODEVer.1 else if ((ver != 1 && requiredStr.equals("AC$MASS_SPECTROMETRY: ION_MODE ") && !val.equals(DEFAULT_VALUE)) || (ver == 1 && requiredStr.equals("AC$ANALYTICAL_CONDITION: MODE ") && !val.equals(DEFAULT_VALUE))) { if (!val.equals("POSITIVE") && !val.equals("NEGATIVE")) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not \"POSITIVE\" or \"NEGATIVE\"."); } } // PK$NUM_PEAKVer.1? else if (requiredStr.equals("PK$NUM_PEAK: ") && !val.equals(DEFAULT_VALUE)) { try { peakNum = Integer.parseInt(val); } catch (NumberFormatException e) { status = STATUS_ERR; detailsErr.append("value of required item [" + requiredStr + "] is not numeric."); } } // PK$PEAK:Ver.1? else if (requiredStr.equals("PK$PEAK: ")) { if (valStrs.size() == 0 || !valStrs.get(0).startsWith("m/z int. rel.int.")) { status = STATUS_ERR; detailsErr.append( "value of required item [PK$PEAK: ] , the first line is not \"PK$PEAK: m/z int. rel.int.\"."); } else { boolean isNa = false; String peak = ""; String mz = ""; String intensity = ""; boolean mzDuplication = false; boolean mzNotNumeric = false; boolean intensityNotNumeric = false; boolean invalidFormat = false; HashSet<String> mzSet = new HashSet<String>(); for (int l = 0; l < valStrs.size(); l++) { peak = valStrs.get(l).trim(); // N/A if (peak.indexOf(DEFAULT_VALUE) != -1) { isNa = true; break; } if (l == 0) { continue; } // m/z int. rel.int.?????????? if (peak.indexOf(" ") != -1) { mz = peak.split(" ")[0]; if (!mzSet.add(mz)) { mzDuplication = true; } try { Double.parseDouble(mz); } catch (NumberFormatException e) { mzNotNumeric = true; } intensity = peak.split(" ")[1]; try { Double.parseDouble(intensity); } catch (NumberFormatException e) { intensityNotNumeric = true; } } else { invalidFormat = true; } if (mzDuplication && mzNotNumeric && intensityNotNumeric && invalidFormat) { break; } } if (isNa) {// PK$PEAK:?N/A?? if (peakNum != -1) { // PK$NUM_PEAK:N/A?? if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [PK$NUM_PEAK: ] is mismatch or \"" + DEFAULT_VALUE + "\"."); } if (valStrs.size() - 1 > 0) { // PK$PEAK:???????? if (status.equals("")) status = STATUS_WARN; detailsWarn.append( "value of required item [PK$NUM_PEAK: ] is invalid peak information exists."); } } else { if (mzDuplication) { status = STATUS_ERR; detailsErr.append( "mz value of required item [" + requiredStr + "] is duplication."); } if (mzNotNumeric) { status = STATUS_ERR; detailsErr.append( "mz value of required item [" + requiredStr + "] is not numeric."); } if (intensityNotNumeric) { status = STATUS_ERR; detailsErr.append("intensity value of required item [" + requiredStr + "] is not numeric."); } if (invalidFormat) { status = STATUS_ERR; detailsErr.append( "value of required item [" + requiredStr + "] is not peak format."); } if (peakNum != 0 && valStrs.size() - 1 == 0) { // ?????N/A????PK$NUM_PEAK:?0??????? if (status.equals("")) status = STATUS_WARN; detailsWarn.append( "value of required item [PK$PEAK: ] is no value. at that time, please add \"" + DEFAULT_VALUE + "\". "); } if (peakNum != valStrs.size() - 1) { if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [PK$NUM_PEAK: ] is mismatch or \"" + DEFAULT_VALUE + "\"."); } } } } } } } String details = detailsErr.toString() + detailsWarn.toString(); if (status.equals("")) { status = STATUS_OK; details = " "; } validationMap.put(name, status + "\t" + details); } // ---------------------------------------------------- // ???? // ---------------------------------------------------- // ?ID?DB HashSet<String> regIdList = new HashSet<String>(); String[] sqls = { "SELECT ID FROM SPECTRUM ORDER BY ID", "SELECT ID FROM RECORD ORDER BY ID", "SELECT ID FROM PEAK GROUP BY ID ORDER BY ID", "SELECT ID FROM CH_NAME ID ORDER BY ID", "SELECT ID FROM CH_LINK ID ORDER BY ID", "SELECT ID FROM TREE WHERE ID IS NOT NULL AND ID<>'' ORDER BY ID" }; for (int i = 0; i < sqls.length; i++) { String execSql = sqls[i]; ResultSet rs = null; try { rs = db.executeQuery(execSql); while (rs.next()) { String idStr = rs.getString("ID"); regIdList.add(idStr); } } catch (SQLException e) { Logger.getLogger("global").severe(" sql : " + execSql); e.printStackTrace(); op.println(msgErr("database access error.")); return new TreeMap<String, String>(); } finally { try { if (rs != null) { rs.close(); } } catch (SQLException e) { } } } // ?ID? final String[] recFileList = (new File(registPath)).list(); for (int i = 0; i < recFileList.length; i++) { String name = recFileList[i]; File file = new File(registPath + File.separator + name); if (!file.isFile() || file.isHidden() || name.lastIndexOf(REC_EXTENSION) == -1) { continue; } String idStr = name.replace(REC_EXTENSION, ""); regIdList.add(idStr); } // ?? for (Map.Entry<String, String> e : validationMap.entrySet()) { String statusStr = e.getValue().split("\t")[0]; if (statusStr.equals(STATUS_ERR)) { continue; } String nameStr = e.getKey(); String idStr = e.getKey().replace(REC_EXTENSION, ""); String detailsStr = e.getValue().split("\t")[1]; if (regIdList.contains(idStr)) { statusStr = STATUS_WARN; detailsStr += "id [" + idStr + "] of file name [" + nameStr + "] already registered."; validationMap.put(nameStr, statusStr + "\t" + detailsStr); } } return validationMap; }
From source file:net.massbank.validator.RecordValidator.java
/** * ??/*from w ww . ja va2 s .com*/ * * @param db * DB * @param op * PrintWriter? * @param dataPath * ? * @param registPath * * @param ver * ? * @return ??Map<??, ?> * @throws IOException * */ private static TreeMap<String, String> validationRecord(DatabaseAccess dbx, PrintStream op, String dataPath, String registPath, int ver) throws IOException { if (ver == 1) { op.println(msgInfo("check record format version is [version 1].")); } final String[] dataList = (new File(dataPath)).list(); TreeMap<String, String> validationMap = new TreeMap<String, String>(); if (dataList.length == 0) { op.println(msgWarn("no file for validation.")); return validationMap; } // ---------------------------------------------------- // ??? // ---------------------------------------------------- String[] requiredList = new String[] { // Ver.2 "ACCESSION: ", "RECORD_TITLE: ", "DATE: ", "AUTHORS: ", "LICENSE: ", "CH$NAME: ", "CH$COMPOUND_CLASS: ", "CH$FORMULA: ", "CH$EXACT_MASS: ", "CH$SMILES: ", "CH$IUPAC: ", "AC$INSTRUMENT: ", "AC$INSTRUMENT_TYPE: ", "AC$MASS_SPECTROMETRY: MS_TYPE ", "AC$MASS_SPECTROMETRY: ION_MODE ", "PK$NUM_PEAK: ", "PK$PEAK: " }; if (ver == 1) { // Ver.1 requiredList = new String[] { "ACCESSION: ", "RECORD_TITLE: ", "DATE: ", "AUTHORS: ", "COPYRIGHT: ", "CH$NAME: ", "CH$COMPOUND_CLASS: ", "CH$FORMULA: ", "CH$EXACT_MASS: ", "CH$SMILES: ", "CH$IUPAC: ", "AC$INSTRUMENT: ", "AC$INSTRUMENT_TYPE: ", "AC$ANALYTICAL_CONDITION: MODE ", "PK$NUM_PEAK: ", "PK$PEAK: " }; } for (int i = 0; i < dataList.length; i++) { String name = dataList[i]; String status = ""; StringBuilder detailsErr = new StringBuilder(); StringBuilder detailsWarn = new StringBuilder(); // ???? File file = new File(dataPath + name); if (file.isDirectory()) { // ?? status = STATUS_ERR; detailsErr.append("[" + name + "] is directory."); validationMap.put(name, status + "\t" + detailsErr.toString()); continue; } else if (file.isHidden()) { // ??? status = STATUS_ERR; detailsErr.append("[" + name + "] is hidden."); validationMap.put(name, status + "\t" + detailsErr.toString()); continue; } else if (name.lastIndexOf(REC_EXTENSION) == -1) { // ???? status = STATUS_ERR; detailsErr.append("file extension of [" + name + "] is not [" + REC_EXTENSION + "]."); validationMap.put(name, status + "\t" + detailsErr.toString()); continue; } // ?? boolean isEndTagRead = false; boolean isInvalidInfo = false; boolean isDoubleByte = false; ArrayList<String> fileContents = new ArrayList<String>(); boolean existLicense = false; // LICENSE?Ver.1 ArrayList<String> workChName = new ArrayList<String>(); // RECORD_TITLE??CH$NAME??Ver.1? String workAcInstrumentType = ""; // RECORD_TITLE??AC$INSTRUMENT_TYPE??Ver.1? String workAcMsType = ""; // RECORD_TITLE??AC$MASS_SPECTROMETRY: // MS_TYPE??Ver.2 String line = ""; BufferedReader br = null; try { br = new BufferedReader(new FileReader(file)); while ((line = br.readLine()) != null) { if (isEndTagRead) { if (!line.equals("")) { isInvalidInfo = true; } } // if (line.startsWith("//")) { isEndTagRead = true; } fileContents.add(line); // LICENSE?Ver.1 if (line.startsWith("LICENSE: ")) { existLicense = true; } // CH$NAME?Ver.1? else if (line.startsWith("CH$NAME: ")) { workChName.add(line.trim().replaceAll("CH\\$NAME: ", "")); } // AC$INSTRUMENT_TYPE?Ver.1? else if (line.startsWith("AC$INSTRUMENT_TYPE: ")) { workAcInstrumentType = line.trim().replaceAll("AC\\$INSTRUMENT_TYPE: ", ""); } // AC$MASS_SPECTROMETRY: MS_TYPE?Ver.2 else if (ver != 1 && line.startsWith("AC$MASS_SPECTROMETRY: MS_TYPE ")) { workAcMsType = line.trim().replaceAll("AC\\$MASS_SPECTROMETRY: MS_TYPE ", ""); } // ? if (!isDoubleByte) { byte[] bytes = line.getBytes("MS932"); if (bytes.length != line.length()) { isDoubleByte = true; } } } } catch (IOException e) { Logger.getLogger("global").severe("file read failed." + NEW_LINE + " " + file.getPath()); e.printStackTrace(); op.println(msgErr("server error.")); validationMap.clear(); return validationMap; } finally { try { if (br != null) { br.close(); } } catch (IOException e) { } } if (isInvalidInfo) { // ????? if (status.equals("")) status = STATUS_WARN; detailsWarn.append("invalid after the end tag [//]."); } if (isDoubleByte) { // ????? if (status.equals("")) status = STATUS_ERR; detailsErr.append("double-byte character included."); } if (ver == 1 && existLicense) { // LICENSE???Ver.1 if (status.equals("")) status = STATUS_ERR; detailsErr.append("[LICENSE: ] tag can not be used in record format [version 1]."); } // ---------------------------------------------------- // ???? // ---------------------------------------------------- boolean isNameCheck = false; int peakNum = -1; for (int j = 0; j < requiredList.length; j++) { String requiredStr = requiredList[j]; ArrayList<String> valStrs = new ArrayList<String>(); // boolean findRequired = false; // boolean findValue = false; // boolean isPeakMode = false; // for (int k = 0; k < fileContents.size(); k++) { String lineStr = fileContents.get(k); // ????RELATED_RECORD?????? if (lineStr.startsWith("//")) { // Ver.1? break; } else if (ver == 1 && lineStr.startsWith("RELATED_RECORD:")) { // Ver.1 break; } // ????? else if (isPeakMode) { findRequired = true; if (!lineStr.trim().equals("")) { valStrs.add(lineStr); } } // ?????? else if (lineStr.indexOf(requiredStr) != -1) { // findRequired = true; if (requiredStr.equals("PK$PEAK: ")) { isPeakMode = true; findValue = true; valStrs.add(lineStr.replace(requiredStr, "")); } else { // String tmpVal = lineStr.replace(requiredStr, ""); if (!tmpVal.trim().equals("")) { findValue = true; valStrs.add(tmpVal); } break; } } } if (!findRequired) { // ?????? status = STATUS_ERR; detailsErr.append("no required item [" + requiredStr + "]."); } else { if (!findValue) { // ????? status = STATUS_ERR; detailsErr.append("no value of required item [" + requiredStr + "]."); } else { // ??? // ---------------------------------------------------- // ?? // ---------------------------------------------------- String val = (valStrs.size() > 0) ? valStrs.get(0) : ""; // ACESSIONVer.1? if (requiredStr.equals("ACCESSION: ")) { if (!val.equals(name.replace(REC_EXTENSION, ""))) { status = STATUS_ERR; detailsErr.append("value of required item [" + requiredStr + "] not correspond to file name."); } if (val.length() != 8) { status = STATUS_ERR; detailsErr.append( "value of required item [" + requiredStr + "] is 8 digits necessary."); } } // RECORD_TITLEVer.1? else if (requiredStr.equals("RECORD_TITLE: ")) { if (!val.equals(DEFAULT_VALUE)) { if (val.indexOf(";") != -1) { String[] recTitle = val.split(";"); if (!workChName.contains(recTitle[0].trim())) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], compound name is not included in the [CH$NAME]."); } if (!workAcInstrumentType.equals(recTitle[1].trim())) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], instrument type is different from [AC$INSTRUMENT_TYPE]."); } if (ver != 1 && !workAcMsType.equals(recTitle[2].trim())) { // Ver.2 if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], ms type is different from [AC$MASS_SPECTROMETRY: MS_TYPE]."); } } else { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not record title format."); if (!workChName.contains(val)) { detailsWarn.append("value of required item [" + requiredStr + "], compound name is not included in the [CH$NAME]."); } if (!workAcInstrumentType.equals(DEFAULT_VALUE)) { detailsWarn.append("value of required item [" + requiredStr + "], instrument type is different from [AC$INSTRUMENT_TYPE]."); } if (ver != 1 && !workAcMsType.equals(DEFAULT_VALUE)) { // Ver.2 detailsWarn.append("value of required item [" + requiredStr + "], ms type is different from [AC$MASS_SPECTROMETRY: MS_TYPE]."); } } } else { if (!workAcInstrumentType.equals(DEFAULT_VALUE)) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], instrument type is different from [AC$INSTRUMENT_TYPE]."); } if (ver != 1 && !workAcMsType.equals(DEFAULT_VALUE)) { // Ver.2 if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "], ms type is different from [AC$MASS_SPECTROMETRY: MS_TYPE]."); } } } // DATEVer.1? else if (requiredStr.equals("DATE: ") && !val.equals(DEFAULT_VALUE)) { val = val.replace(".", "/"); val = val.replace("-", "/"); try { DateFormat.getDateInstance(DateFormat.SHORT, Locale.JAPAN).parse(val); } catch (ParseException e) { if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [" + requiredStr + "] is not date format."); } } // CH$COMPOUND_CLASSVer.1? else if (requiredStr.equals("CH$COMPOUND_CLASS: ") && !val.equals(DEFAULT_VALUE)) { if (!val.startsWith("Natural Product") && !val.startsWith("Non-Natural Product")) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not compound class format."); } } // CH$EXACT_MASSVer.1? else if (requiredStr.equals("CH$EXACT_MASS: ") && !val.equals(DEFAULT_VALUE)) { try { Double.parseDouble(val); } catch (NumberFormatException e) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not numeric."); } } // AC$INSTRUMENT_TYPEVer.1? else if (requiredStr.equals("AC$INSTRUMENT_TYPE: ") && !val.equals(DEFAULT_VALUE)) { if (val.trim().indexOf(" ") != -1) { if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [" + requiredStr + "] is space included."); } if (val.trim().indexOf(" ") != -1) { if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [" + requiredStr + "] is space included."); } } // AC$MASS_SPECTROMETRY: MS_TYPEVer.2 else if (ver != 1 && requiredStr.equals("AC$MASS_SPECTROMETRY: MS_TYPE ") && !val.equals(DEFAULT_VALUE)) { boolean isMsType = true; if (val.startsWith("MS")) { val = val.replace("MS", ""); if (!val.equals("")) { try { Integer.parseInt(val); } catch (NumberFormatException e) { isMsType = false; } } } else { isMsType = false; } if (!isMsType) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not \"MSn\"."); } } // AC$MASS_SPECTROMETRY: // ION_MODEVer.2?AC$ANALYTICAL_CONDITION: MODEVer.1 else if ((ver != 1 && requiredStr.equals("AC$MASS_SPECTROMETRY: ION_MODE ") && !val.equals(DEFAULT_VALUE)) || (ver == 1 && requiredStr.equals("AC$ANALYTICAL_CONDITION: MODE ") && !val.equals(DEFAULT_VALUE))) { if (!val.equals("POSITIVE") && !val.equals("NEGATIVE")) { if (status.equals("")) status = STATUS_WARN; detailsWarn.append("value of required item [" + requiredStr + "] is not \"POSITIVE\" or \"NEGATIVE\"."); } } // PK$NUM_PEAKVer.1? else if (requiredStr.equals("PK$NUM_PEAK: ") && !val.equals(DEFAULT_VALUE)) { try { peakNum = Integer.parseInt(val); } catch (NumberFormatException e) { status = STATUS_ERR; detailsErr.append("value of required item [" + requiredStr + "] is not numeric."); } } // PK$PEAK:Ver.1? else if (requiredStr.equals("PK$PEAK: ")) { if (valStrs.size() == 0 || !valStrs.get(0).startsWith("m/z int. rel.int.")) { status = STATUS_ERR; detailsErr.append( "value of required item [PK$PEAK: ] , the first line is not \"PK$PEAK: m/z int. rel.int.\"."); } else { boolean isNa = false; String peak = ""; String mz = ""; String intensity = ""; boolean mzDuplication = false; boolean mzNotNumeric = false; boolean intensityNotNumeric = false; boolean invalidFormat = false; HashSet<String> mzSet = new HashSet<String>(); for (int l = 0; l < valStrs.size(); l++) { peak = valStrs.get(l).trim(); // N/A if (peak.indexOf(DEFAULT_VALUE) != -1) { isNa = true; break; } if (l == 0) { continue; } // m/z int. rel.int.?????????? if (peak.indexOf(" ") != -1) { mz = peak.split(" ")[0]; if (!mzSet.add(mz)) { mzDuplication = true; } try { Double.parseDouble(mz); } catch (NumberFormatException e) { mzNotNumeric = true; } intensity = peak.split(" ")[1]; try { Double.parseDouble(intensity); } catch (NumberFormatException e) { intensityNotNumeric = true; } } else { invalidFormat = true; } if (mzDuplication && mzNotNumeric && intensityNotNumeric && invalidFormat) { break; } } if (isNa) {// PK$PEAK:?N/A?? if (peakNum != -1) { // PK$NUM_PEAK:N/A?? if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [PK$NUM_PEAK: ] is mismatch or \"" + DEFAULT_VALUE + "\"."); } if (valStrs.size() - 1 > 0) { // PK$PEAK:???????? if (status.equals("")) status = STATUS_WARN; detailsWarn.append( "value of required item [PK$NUM_PEAK: ] is invalid peak information exists."); } } else { if (mzDuplication) { status = STATUS_ERR; detailsErr.append( "mz value of required item [" + requiredStr + "] is duplication."); } if (mzNotNumeric) { status = STATUS_ERR; detailsErr.append( "mz value of required item [" + requiredStr + "] is not numeric."); } if (intensityNotNumeric) { status = STATUS_ERR; detailsErr.append("intensity value of required item [" + requiredStr + "] is not numeric."); } if (invalidFormat) { status = STATUS_ERR; detailsErr.append( "value of required item [" + requiredStr + "] is not peak format."); } if (peakNum != 0 && valStrs.size() - 1 == 0) { // ?????N/A????PK$NUM_PEAK:?0??????? if (status.equals("")) status = STATUS_WARN; detailsWarn.append( "value of required item [PK$PEAK: ] is no value. at that time, please add \"" + DEFAULT_VALUE + "\". "); } if (peakNum != valStrs.size() - 1) { if (status.equals("")) status = STATUS_WARN; detailsWarn .append("value of required item [PK$NUM_PEAK: ] is mismatch or \"" + DEFAULT_VALUE + "\"."); } } } } } } } String details = detailsErr.toString() + detailsWarn.toString(); if (status.equals("")) { status = STATUS_OK; details = " "; } validationMap.put(name, status + "\t" + details); } // // ---------------------------------------------------- // // ???? // // ---------------------------------------------------- // // ?ID?DB // HashSet<String> regIdList = new HashSet<String>(); // String[] sqls = { "SELECT ID FROM SPECTRUM ORDER BY ID", // "SELECT ID FROM RECORD ORDER BY ID", // "SELECT ID FROM PEAK GROUP BY ID ORDER BY ID", // "SELECT ID FROM CH_NAME ID ORDER BY ID", // "SELECT ID FROM CH_LINK ID ORDER BY ID", // "SELECT ID FROM TREE WHERE ID IS NOT NULL AND ID<>'' ORDER BY ID" }; // for (int i = 0; i < sqls.length; i++) { // String execSql = sqls[i]; // ResultSet rs = null; // try { // rs = db.executeQuery(execSql); // while (rs.next()) { // String idStr = rs.getString("ID"); // regIdList.add(idStr); // } // } catch (SQLException e) { // Logger.getLogger("global").severe(" sql : " + execSql); // e.printStackTrace(); // op.println(msgErr("database access error.")); // return new TreeMap<String, String>(); // } finally { // try { // if (rs != null) { // rs.close(); // } // } catch (SQLException e) { // } // } // } // // ?ID? // final String[] recFileList = (new File(registPath)).list(); // for (int i = 0; i < recFileList.length; i++) { // String name = recFileList[i]; // File file = new File(registPath + File.separator + name); // if (!file.isFile() || file.isHidden() // || name.lastIndexOf(REC_EXTENSION) == -1) { // continue; // } // String idStr = name.replace(REC_EXTENSION, ""); // regIdList.add(idStr); // } // // ?? // for (Map.Entry<String, String> e : validationMap.entrySet()) { // String statusStr = e.getValue().split("\t")[0]; // if (statusStr.equals(STATUS_ERR)) { // continue; // } // String nameStr = e.getKey(); // String idStr = e.getKey().replace(REC_EXTENSION, ""); // String detailsStr = e.getValue().split("\t")[1]; // if (regIdList.contains(idStr)) { // statusStr = STATUS_WARN; // detailsStr += "id [" // + idStr + "] of file name [" // + nameStr // + "] already registered."; // validationMap.put(nameStr, statusStr + "\t" + detailsStr); // } // } return validationMap; }
From source file:org.commoncrawl.util.NodeAffinityMaskBuilder.java
public static String buildNodeAffinityMask(FileSystem fileSystem, Path partFileDirectory, Map<Integer, String> optionalRootMapHint, Set<String> excludedNodeList, int maxReducersPerNode, boolean skipBalance) throws IOException { TreeMap<Integer, String> partitionToNodeMap = new TreeMap<Integer, String>(); FileStatus paths[] = fileSystem.globStatus(new Path(partFileDirectory, "part-*")); if (paths.length == 0) { throw new IOException("Invalid source Path:" + partFileDirectory); }/*from www.j av a 2 s . co m*/ Multimap<String, Integer> inverseMap = TreeMultimap.create(); Map<Integer, List<String>> paritionToDesiredCandidateList = new TreeMap<Integer, List<String>>(); // iterate paths for (FileStatus path : paths) { String currentFile = path.getPath().getName(); int partitionNumber; try { if (currentFile.startsWith("part-r")) { partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-r-".length())).intValue(); } else { partitionNumber = NUMBER_FORMAT.parse(currentFile.substring("part-".length())).intValue(); } } catch (ParseException e) { throw new IOException("Invalid Part Name Encountered:" + currentFile); } // get block locations BlockLocation locations[] = fileSystem.getFileBlockLocations(path, 0, path.getLen()); // if passed in root map is not null, then validate that all blocks for the current file reside on the desired node if (optionalRootMapHint != null) { // the host all blocks should reside on String desiredHost = optionalRootMapHint.get(partitionNumber); ArrayList<String> misplacedBlocks = new ArrayList<String>(); // ok walk all blocks for (BlockLocation location : locations) { boolean found = false; for (String host : location.getHosts()) { if (host.compareTo(desiredHost) == 0) { found = true; break; } } if (!found) { misplacedBlocks.add("Block At:" + location.getOffset() + " for File:" + path.getPath() + " did not contain desired location:" + desiredHost); } } // ok pass test at a certain threshold if (misplacedBlocks.size() != 0 && ((float) misplacedBlocks.size() / (float) locations.length) > .50f) { LOG.error("Misplaced Blocks Exceed Threshold"); for (String misplacedBlock : misplacedBlocks) { LOG.error(misplacedBlock); } // TODO: SKIP THIS STEP FOR NOW ??? //throw new IOException("Misplaced Blocks Exceed Threshold!"); } partitionToNodeMap.put(partitionNumber, desiredHost); } else { if (excludedNodeList != null) { // LOG.info("Exclued Node List is:" + Lists.newArrayList(excludedNodeList).toString()); } // ok ask file system for block locations TreeMap<String, Integer> nodeToBlockCount = new TreeMap<String, Integer>(); for (BlockLocation location : locations) { for (String host : location.getHosts()) { if (excludedNodeList == null || !excludedNodeList.contains(host)) { Integer nodeHitCount = nodeToBlockCount.get(host); if (nodeHitCount == null) { nodeToBlockCount.put(host, 1); } else { nodeToBlockCount.put(host, nodeHitCount.intValue() + 1); } } } } if (nodeToBlockCount.size() == 0) { throw new IOException("No valid nodes found for partition number:" + path); } Map.Entry<String, Integer> entries[] = nodeToBlockCount.entrySet().toArray(new Map.Entry[0]); Arrays.sort(entries, new Comparator<Map.Entry<String, Integer>>() { @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { return o1.getValue().intValue() < o2.getValue().intValue() ? 1 : o1.getValue().intValue() == o2.getValue().intValue() ? 0 : -1; } }); // build a list of nodes by priority ... List<String> nodesByPriority = Lists.transform(Lists.newArrayList(entries), new Function<Map.Entry<String, Integer>, String>() { @Override public String apply(Entry<String, Integer> entry) { return entry.getKey(); } }); // stash it away ... paritionToDesiredCandidateList.put(partitionNumber, nodesByPriority); //LOG.info("Mapping Partition:" + partitionNumber + " To Node:" + entries[0].getKey() + " BlockCount" + entries[0].getValue().intValue()); partitionToNodeMap.put(partitionNumber, entries[0].getKey()); // store the inverse mapping ... inverseMap.put(entries[0].getKey(), partitionNumber); } } if (skipBalance) { // walk partition map to make sure everything is assigned ... /* for (String node : inverseMap.keys()) { if (inverseMap.get(node).size() > maxReducersPerNode) { throw new IOException("Node:" + node + " has too many partitions! ("+inverseMap.get(node).size()); } } */ } // now if optional root map hint is null if (optionalRootMapHint == null && !skipBalance) { // figure out if there is an imbalance int avgRegionsPerNode = (int) Math.floor((float) paths.length / (float) inverseMap.keySet().size()); int maxRegionsPerNode = (int) Math.ceil((float) paths.length / (float) inverseMap.keySet().size()); LOG.info("Attempting to ideally balance nodes. Avg paritions per node:" + avgRegionsPerNode); // two passes .. for (int pass = 0; pass < 2; ++pass) { LOG.info("Pass:" + pass); // iterate nodes ... for (String node : ImmutableSet.copyOf(inverseMap.keySet())) { // get paritions in map Collection<Integer> paritions = ImmutableList.copyOf(inverseMap.get(node)); // if parition count exceeds desired average ... if (paritions.size() > maxRegionsPerNode) { // first pass, assign based on preference if (pass == 0) { LOG.info("Node:" + node + " parition count:" + paritions.size() + " exceeds avg:" + avgRegionsPerNode); // walk partitions trying to find a node to discrard the parition to for (int partition : paritions) { for (String candidate : paritionToDesiredCandidateList.get(partition)) { if (!candidate.equals(node)) { // see if this candidate has room .. if (inverseMap.get(candidate).size() < avgRegionsPerNode) { LOG.info("REASSIGNING parition:" + partition + " from Node:" + node + " to Node:" + candidate); // found match reassign it ... inverseMap.remove(node, partition); inverseMap.put(candidate, partition); break; } } } // break out if reach our desired number of paritions for this node if (inverseMap.get(node).size() == avgRegionsPerNode) break; } } // second pass ... assign based on least loaded node ... else { int desiredRelocations = paritions.size() - maxRegionsPerNode; LOG.info("Desired Relocation for node:" + node + ":" + desiredRelocations + " partitions:" + paritions.size()); for (int i = 0; i < desiredRelocations; ++i) { String leastLoadedNode = null; int leastLoadedNodePartitionCount = 0; for (String candidateNode : inverseMap.keySet()) { if (leastLoadedNode == null || inverseMap.get(candidateNode) .size() < leastLoadedNodePartitionCount) { leastLoadedNode = candidateNode; leastLoadedNodePartitionCount = inverseMap.get(candidateNode).size(); } } int bestPartition = -1; int bestParitionOffset = -1; for (int candidateParition : inverseMap.get(node)) { int offset = 0; for (String nodeCandidate : paritionToDesiredCandidateList .get(candidateParition)) { if (nodeCandidate.equals(leastLoadedNode)) { if (bestPartition == -1 || bestParitionOffset > offset) { bestPartition = candidateParition; bestParitionOffset = offset; } break; } offset++; } } if (bestPartition == -1) { bestPartition = Iterables.get(inverseMap.get(node), 0); } LOG.info("REASSIGNING parition:" + bestPartition + " from Node:" + node + " to Node:" + leastLoadedNode); // found match reassign it ... inverseMap.remove(node, bestPartition); inverseMap.put(leastLoadedNode, bestPartition); } } } } } LOG.info("Rebuilding parition to node map based on ideal balance"); for (String node : inverseMap.keySet()) { LOG.info("Node:" + node + " has:" + inverseMap.get(node).size() + " partitions:" + inverseMap.get(node).toString()); } partitionToNodeMap.clear(); for (Map.Entry<String, Integer> entry : inverseMap.entries()) { partitionToNodeMap.put(entry.getValue(), entry.getKey()); } } StringBuilder builder = new StringBuilder(); int itemCount = 0; for (Map.Entry<Integer, String> entry : partitionToNodeMap.entrySet()) { if (itemCount++ != 0) builder.append("\t"); builder.append(entry.getKey().intValue() + "," + entry.getValue()); } return builder.toString(); }