List of usage examples for java.util TreeMap containsKey
public boolean containsKey(Object key)
From source file:models.Indexer.java
private void performIndexing(String cur_file_name) throws IOException { int cur_docId; if (!docPaths_to_ids_map.containsKey(cur_file_name)) { cur_docId = num_of_docs_indexed; docPaths_to_ids_map.put(cur_file_name, num_of_docs_indexed); num_of_docs_indexed++;// w w w . j a v a2s . c o m } else { cur_docId = docPaths_to_ids_map.get(cur_file_name); } if (cur_file_name.endsWith("pdf")) { System.out.println("reading: " + cur_file_name + " file"); String text = null; PDFdoc cur_doc = new PDFdoc(); try { text = cur_doc.extractPDF(installation_directory_path + "\\downloads\\" + cur_file_name); } catch (Exception e) { System.out.println("can not extract pdf"); System.err.println(); } String[] tokens_arr; if (text == null) { System.out.println("problem to read pdf"); return; } tokens_arr = text.toLowerCase().split("[^a-z]"); int total_words = tokens_arr.length; for (String tokens_arr1 : tokens_arr) { String term = tokens_arr1; if (inverted_index.containsKey(term)) // is this term present in Index? { TreeMap<Integer, Integer> Doc_hashtable; Doc_hashtable = inverted_index.get(term); if (Doc_hashtable.containsKey(cur_docId)) //is this file (value) present in Index ? { Integer TF_of_the_term_in_this_doc = (Integer) inverted_index.get(term).get(cur_docId); TF_of_the_term_in_this_doc++; Doc_hashtable.put(cur_docId, TF_of_the_term_in_this_doc); inverted_index.put(term, Doc_hashtable); } else // Index does not contain this term { Integer TF_of_this_term_in_this_doc = 1; Doc_hashtable.put(cur_docId, TF_of_this_term_in_this_doc); inverted_index.put(term, Doc_hashtable); } } else // Index does not contain this term { Integer TF = 1; TreeMap<Integer, Integer> doc_hashtable = new TreeMap<>(); doc_hashtable.put(cur_docId, TF); inverted_index.put(term, doc_hashtable); } } // System.out.println(inverted_index); documents_len_table.put(cur_docId, new Integer(total_words)); } }
From source file:com.sfs.whichdoctor.importer.Importer.java
/** * Process data./*from w w w . j av a 2 s . c o m*/ * * @param type the type * @param data the data * @param columns the columns * @param applicationContext the application context * * @throws IOException Signals that an I/O exception has occurred. */ public final void processData(final String type, final TreeMap<Integer, TreeMap<Integer, String>> data, final TreeMap<Integer, String> columns, final ApplicationContext applicationContext) throws IOException { HashMap<String, List<String>> dataMap = new HashMap<String, List<String>>(); if (data != null && columns != null) { // Iterate through the columns assigning values to the dataMap for (Integer columnNumber : columns.keySet()) { String columnField = columns.get(columnNumber); if (StringUtils.isNotBlank(columnField)) { // The column has been associated to some data ArrayList<String> dataValues = new ArrayList<String>(); for (Integer rowNumber : data.keySet()) { TreeMap<Integer, String> rowData = data.get(rowNumber); if (rowData.containsKey(columnNumber)) { // Get the value of the column and add it array String dataField = rowData.get(columnNumber); dataValues.add(dataField); } } // Add the data name and array to the dataMap dataMap.put(columnField, dataValues); } } // Store the completed dataMap setDataMap(dataMap); } // Take the dataMap and load the relevant objects into the bean map if (StringUtils.equalsIgnoreCase(type, "exam")) { importLogger.debug("Processing exam import"); try { ExamImporter examImporter = new ExamImporter(); examImporter.setDataMap(getDataMap()); /* Process the data map and turn it into a bean map */ final PersonDAO personDAO = (PersonDAO) applicationContext.getBean("personDAO"); setBeanMap(examImporter.assignData(personDAO)); setDescriptionMap(examImporter.getDescriptionMap()); setImportMessage(examImporter.getImportMessage()); } catch (Exception e) { importLogger.error("Error processing exam import: " + e.getMessage()); throw new IOException("Error processing exam import: " + e.getMessage()); } } }
From source file:org.apache.hadoop.hbase.util.RegionSplitter.java
static void rollingSplit(String tableName, SplitAlgorithm splitAlgo, Configuration conf) throws IOException, InterruptedException { final int minOS = conf.getInt("split.outstanding", 2); HTable table = new HTable(conf, tableName); // max outstanding splits. default == 50% of servers final int MAX_OUTSTANDING = Math.max(table.getConnection().getCurrentNrHRS() / 2, minOS); Path hbDir = FSUtils.getRootDir(conf); Path tableDir = FSUtils.getTableDir(hbDir, table.getName()); Path splitFile = new Path(tableDir, "_balancedSplit"); FileSystem fs = FileSystem.get(conf); // get a list of daughter regions to create LinkedList<Pair<byte[], byte[]>> tmpRegionSet = getSplits(table, splitAlgo); LinkedList<Pair<byte[], byte[]>> outstanding = Lists.newLinkedList(); int splitCount = 0; final int origCount = tmpRegionSet.size(); // all splits must compact & we have 1 compact thread, so 2 split // requests to the same RS can stall the outstanding split queue. // To fix, group the regions into an RS pool and round-robin through it LOG.debug("Bucketing regions by regionserver..."); TreeMap<String, LinkedList<Pair<byte[], byte[]>>> daughterRegions = Maps.newTreeMap(); for (Pair<byte[], byte[]> dr : tmpRegionSet) { String rsLocation = table.getRegionLocation(dr.getSecond()).getHostnamePort(); if (!daughterRegions.containsKey(rsLocation)) { LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList(); daughterRegions.put(rsLocation, entry); }//from w w w. ja va2s .co m daughterRegions.get(rsLocation).add(dr); } LOG.debug("Done with bucketing. Split time!"); long startTime = System.currentTimeMillis(); // open the split file and modify it as splits finish FSDataInputStream tmpIn = fs.open(splitFile); byte[] rawData = new byte[tmpIn.available()]; tmpIn.readFully(rawData); tmpIn.close(); FSDataOutputStream splitOut = fs.create(splitFile); splitOut.write(rawData); try { // *** split code *** while (!daughterRegions.isEmpty()) { LOG.debug(daughterRegions.size() + " RS have regions to splt."); // Get RegionServer : region count mapping final TreeMap<ServerName, Integer> rsSizes = Maps.newTreeMap(); Map<HRegionInfo, ServerName> regionsInfo = table.getRegionLocations(); for (ServerName rs : regionsInfo.values()) { if (rsSizes.containsKey(rs)) { rsSizes.put(rs, rsSizes.get(rs) + 1); } else { rsSizes.put(rs, 1); } } // sort the RS by the number of regions they have List<String> serversLeft = Lists.newArrayList(daughterRegions.keySet()); Collections.sort(serversLeft, new Comparator<String>() { public int compare(String o1, String o2) { return rsSizes.get(o1).compareTo(rsSizes.get(o2)); } }); // round-robin through the RS list. Choose the lightest-loaded servers // first to keep the master from load-balancing regions as we split. for (String rsLoc : serversLeft) { Pair<byte[], byte[]> dr = null; // find a region in the RS list that hasn't been moved LOG.debug("Finding a region on " + rsLoc); LinkedList<Pair<byte[], byte[]>> regionList = daughterRegions.get(rsLoc); while (!regionList.isEmpty()) { dr = regionList.pop(); // get current region info byte[] split = dr.getSecond(); HRegionLocation regionLoc = table.getRegionLocation(split); // if this region moved locations String newRs = regionLoc.getHostnamePort(); if (newRs.compareTo(rsLoc) != 0) { LOG.debug("Region with " + splitAlgo.rowToStr(split) + " moved to " + newRs + ". Relocating..."); // relocate it, don't use it right now if (!daughterRegions.containsKey(newRs)) { LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList(); daughterRegions.put(newRs, entry); } daughterRegions.get(newRs).add(dr); dr = null; continue; } // make sure this region wasn't already split byte[] sk = regionLoc.getRegionInfo().getStartKey(); if (sk.length != 0) { if (Bytes.equals(split, sk)) { LOG.debug("Region already split on " + splitAlgo.rowToStr(split) + ". Skipping this region..."); ++splitCount; dr = null; continue; } byte[] start = dr.getFirst(); Preconditions.checkArgument(Bytes.equals(start, sk), splitAlgo.rowToStr(start) + " != " + splitAlgo.rowToStr(sk)); } // passed all checks! found a good region break; } if (regionList.isEmpty()) { daughterRegions.remove(rsLoc); } if (dr == null) continue; // we have a good region, time to split! byte[] split = dr.getSecond(); LOG.debug("Splitting at " + splitAlgo.rowToStr(split)); HBaseAdmin admin = new HBaseAdmin(table.getConfiguration()); admin.split(table.getTableName(), split); LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList(); if (conf.getBoolean("split.verify", true)) { // we need to verify and rate-limit our splits outstanding.addLast(dr); // with too many outstanding splits, wait for some to finish while (outstanding.size() >= MAX_OUTSTANDING) { finished = splitScan(outstanding, table, splitAlgo); if (finished.isEmpty()) { Thread.sleep(30 * 1000); } else { outstanding.removeAll(finished); } } } else { finished.add(dr); } // mark each finished region as successfully split. for (Pair<byte[], byte[]> region : finished) { splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) + " " + splitAlgo.rowToStr(region.getSecond()) + "\n"); splitCount++; if (splitCount % 10 == 0) { long tDiff = (System.currentTimeMillis() - startTime) / splitCount; LOG.debug("STATUS UPDATE: " + splitCount + " / " + origCount + ". Avg Time / Split = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff)); } } } } if (conf.getBoolean("split.verify", true)) { while (!outstanding.isEmpty()) { LinkedList<Pair<byte[], byte[]>> finished = splitScan(outstanding, table, splitAlgo); if (finished.isEmpty()) { Thread.sleep(30 * 1000); } else { outstanding.removeAll(finished); for (Pair<byte[], byte[]> region : finished) { splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) + " " + splitAlgo.rowToStr(region.getSecond()) + "\n"); } } } } LOG.debug("All regions have been successfully split!"); } finally { long tDiff = System.currentTimeMillis() - startTime; LOG.debug("TOTAL TIME = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff)); LOG.debug("Splits = " + splitCount); LOG.debug("Avg Time / Split = " + org.apache.hadoop.util.StringUtils.formatTime(tDiff / splitCount)); splitOut.close(); if (table != null) { table.close(); } } fs.delete(splitFile, false); }
From source file:ANNFileDetect.EncogTestClass.java
private void createReport(TreeMap<Double, Integer> ht, String file) throws IOException { TreeMap<Integer, ArrayList<Double>> tm = new TreeMap<Integer, ArrayList<Double>>(); for (Map.Entry<Double, Integer> entry : ht.entrySet()) { if (tm.containsKey(entry.getValue())) { ArrayList<Double> al = (ArrayList<Double>) tm.get(entry.getValue()); al.add(entry.getKey());/*from w w w. j a v a2s.c o m*/ tm.put(entry.getValue(), al); } else { ArrayList<Double> al = new ArrayList<Double>(); al.add(entry.getKey()); tm.put(entry.getValue(), al); } } String[] tmpfl = file.split("/"); if (tmpfl.length < 2) tmpfl = file.split("\\\\"); String crp = tmpfl[tmpfl.length - 1]; String[] actfl = crp.split("\\."); FileWriter fstream = new FileWriter("tempTrainingFiles/" + actfl[1].toUpperCase() + actfl[0] + ".txt"); BufferedWriter fileto = new BufferedWriter(fstream); int size = tm.size(); int cnt = 0; for (Map.Entry<Integer, ArrayList<Double>> entry : tm.entrySet()) { if (cnt > (size - 10) && entry.getKey() > 2 && entry.getValue().size() < 20) { double tmpval = ((double) entry.getKey()) / filebytes; fileto.write("Times: " + tmpval + " Values: "); for (Double dbl : entry.getValue()) { fileto.write(dbl + " "); } fileto.write("\n"); } cnt++; } fileto.close(); }
From source file:com.michellemay.mappings.LanguageTagsMapping.java
/** * Instantiates a new Language tags mapping. */// w w w .j a va 2 s . c om public LanguageTagsMapping() { super(NAME); this.withCaseSensitive(false); // Build reverse map. Use a tree map to offer case insensitiveness while preserving keys case (useful for extending) TreeMap<String, Locale> map = new TreeMap<String, Locale>( this.getCaseSensitive() ? null : String.CASE_INSENSITIVE_ORDER); for (Locale loc : LocaleUtils.availableLocaleList()) { String isoCode = loc.getLanguage(); if (isoCode.length() > 0) { String displayValue = loc.toLanguageTag(); if (!map.containsKey(displayValue)) { // Also add variant with underscores map.put(displayValue, loc); map.put(displayValue.replace('-', '_'), loc); } } } this.withMapping(map); }
From source file:agendavital.modelo.data.Noticia.java
public static TreeMap<LocalDate, ArrayList<Noticia>> buscar(String _parametro) throws ConexionBDIncorrecta, SQLException { final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("dd-MM-yyyy"); ArrayList<String> _tags = UtilidadesBusqueda.separarPalabras(_parametro); TreeMap<LocalDate, ArrayList<Noticia>> busqueda = null; try (Connection conexion = ConfigBD.conectar()) { busqueda = new TreeMap<>(); for (String _tag : _tags) { String tag = ConfigBD.String2Sql(_tag, true); String buscar = String.format("SELECT id_Noticia, fecha from noticias " + "WHERE id_noticia IN (SELECT id_noticia from momentos_noticias_etiquetas " + "WHERE id_etiqueta IN (SELECT id_etiqueta from etiquetas WHERE nombre LIKE %s)) " + "OR titulo LIKE %s " + "OR cuerpo LIKE %s " + "OR categoria LIKE %s " + "OR fecha LIKE %s; ", tag, tag, tag, tag, tag); ResultSet rs = conexion.createStatement().executeQuery(buscar); while (rs.next()) { LocalDate date = LocalDate.parse(rs.getString("fecha"), dateFormatter); Noticia insertarNoticia = new Noticia(rs.getInt("id_noticia")); if (busqueda.containsKey(date)) { boolean encontrado = false; for (int i = 0; i < busqueda.get(date).size() && !encontrado; i++) if (busqueda.get(date).get(i).getId() == insertarNoticia.getId()) encontrado = true; if (!encontrado) busqueda.get(date).add(insertarNoticia); } else { busqueda.put(date, new ArrayList<>()); busqueda.get(date).add(insertarNoticia); }/*from w w w . java 2s. c o m*/ } } } catch (SQLException e) { e.printStackTrace(); } Iterator it = busqueda.keySet().iterator(); return busqueda; }
From source file:net.anthonypoon.ngram.correlation.CorrelationReducer.java
@Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { TreeMap<String, Double> currElement = new TreeMap(); for (Text val : values) { String[] strArray = val.toString().split("\t"); currElement.put(strArray[0], Double.valueOf(strArray[1])); }// w w w.j a v a2 s . c om double[] currElementPrimitve = new double[upbound - lowbound + 1]; for (Integer i = 0; i <= upbound - lowbound; i++) { if (currElement.containsKey(String.valueOf(lowbound + i - lag))) { currElementPrimitve[i] = currElement.get(String.valueOf(lowbound + i - lag)); } else { currElementPrimitve[i] = 0; } } for (Map.Entry<String, TreeMap<String, Double>> pair : corrTargetArray.entrySet()) { double[] targetElemetPrimitive = new double[upbound - lowbound + 1]; for (Integer i = 0; i <= upbound - lowbound; i++) { if (pair.getValue().containsKey(String.valueOf(lowbound + i))) { targetElemetPrimitive[i] = pair.getValue().get(String.valueOf(lowbound + i)); } else { targetElemetPrimitive[i] = 0; } } Double correlation = new PearsonsCorrelation().correlation(targetElemetPrimitive, currElementPrimitve); if (correlation > threshold) { NumberFormat formatter = new DecimalFormat("#0.000"); context.write(key, new Text(pair.getKey() + "\t" + formatter.format(correlation))); } } }
From source file:opendap.hai.BesControlApi.java
private String getValidLoggerName(BES bes, String loggerName) throws BesAdminFail { TreeMap<String, BES.BesLogger> validLoggers = bes.getBesLoggers(); if (validLoggers.containsKey(loggerName)) { BES.BesLogger besLogger = validLoggers.get(loggerName); return besLogger.getName(); }/* ww w.ja va 2s .c o m*/ log.debug("User requested unknown BES logger: '{}'", loggerName); return null; }
From source file:org.apache.storm.scheduler.IsolationScheduler.java
private Map<Integer, Integer> machineDistribution(TopologyDetails topology) { int machineNum = isoMachines.get(topology.getName()).intValue(); int workerNum = topology.getNumWorkers(); TreeMap<Integer, Integer> distribution = Utils.integerDivided(workerNum, machineNum); if (distribution.containsKey(0)) { distribution.remove(0);/* w w w .jav a 2s.c o m*/ } return distribution; }
From source file:pt.webdetails.di.baserver.utils.inspector.Inspector.java
private boolean inspectEndpoints(final String moduleName) { String endpointUrl;/*from w w w. ja va2s . com*/ if (moduleName.equals("platform")) { endpointUrl = getBaseUrl(this.serverUrl) + "/application.wadl"; } else { endpointUrl = getBaseUrl(this.serverUrl, moduleName) + "/application.wadl"; } URI uri = null; try { uri = new URI(endpointUrl); } catch (URISyntaxException e) { // do nothing } if (uri != null) { Response response = null; try { response = HttpConnectionHelper.callHttp(uri.toASCIIString(), this.userName, this.password); } catch (IOException e) { // do nothing } catch (KettleStepException e) { // do nothing } if (response != null && response.getStatusCode() == HttpStatus.SC_OK) { SAXReader reader = new SAXReader(); InputStream inputStream = new ByteArrayInputStream(response.getResult().getBytes()); WadlParser parser = new WadlParser(); try { Document doc = reader.read(inputStream); TreeMap<String, LinkedList<Endpoint>> endpointMap = new TreeMap<String, LinkedList<Endpoint>>(); for (Endpoint endpoint : parser.getEndpoints(doc)) { final String path = endpoint.getPath(); if (!endpointMap.containsKey(path)) { endpointMap.put(path, new LinkedList<Endpoint>()); } endpointMap.get(path).add(endpoint); } this.endpoints.put(moduleName, endpointMap); return true; } catch (DocumentException e) { // do nothing } } } return false; }