List of usage examples for java.util Scanner hasNext
public boolean hasNext()
From source file:com.tesora.dve.tools.CLIBuilder.java
private List<File> scanFilesOptional(Scanner scanner) throws PEException { final List<File> files = new ArrayList<File>(); while (scanner.hasNext()) { final File scanned = scanFile(scanner); if (scanned != null) { files.add(scanned);/*from w w w .j a v a 2 s . c o m*/ } } return files; }
From source file:com.gatf.executor.core.AcceptanceTestContext.java
private void initSoapContextAndHttpHeaders() throws Exception { Field[] declaredFields = HttpHeaders.class.getDeclaredFields(); for (Field field : declaredFields) { if (java.lang.reflect.Modifier.isStatic(field.getModifiers()) && field.getType().equals(String.class)) { httpHeaders.put(field.get(null).toString().toLowerCase(), field.get(null).toString()); }// ww w. ja va 2s. c o m } File file = null; if (gatfExecutorConfig.getWsdlLocFile() != null && !gatfExecutorConfig.getWsdlLocFile().trim().isEmpty()) file = getResourceFile(gatfExecutorConfig.getWsdlLocFile()); if (file != null) { Scanner s = new Scanner(file); s.useDelimiter("\n"); List<String> list = new ArrayList<String>(); while (s.hasNext()) { list.add(s.next().replace("\r", "")); } s.close(); for (String wsdlLoc : list) { if (!wsdlLoc.trim().isEmpty()) { String[] wsdlLocParts = wsdlLoc.split(","); logger.info("Started Parsing WSDL location - " + wsdlLocParts[1]); Wsdl wsdl = Wsdl.parse(wsdlLocParts[1]); for (QName bindingName : wsdl.getBindings()) { SoapBuilder builder = wsdl.getBuilder(bindingName); for (SoapOperation operation : builder.getOperations()) { String request = builder.buildInputMessage(operation); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document soapMessage = db.parse(new ByteArrayInputStream(request.getBytes())); if (gatfExecutorConfig.isDistributedLoadTests()) { soapStrMessages.put(wsdlLocParts[0] + operation.getOperationName(), request); } soapMessages.put(wsdlLocParts[0] + operation.getOperationName(), soapMessage); if (operation.getSoapAction() != null) { soapActions.put(wsdlLocParts[0] + operation.getOperationName(), operation.getSoapAction()); } logger.info("Adding message for SOAP operation - " + operation.getOperationName()); } soapEndpoints.put(wsdlLocParts[0], builder.getServiceUrls().get(0)); logger.info("Adding SOAP Service endpoint - " + builder.getServiceUrls().get(0)); } logger.info("Done Parsing WSDL location - " + wsdlLocParts[1]); } } } }
From source file:org.fao.geonet.kernel.csw.services.GetRecords.java
/** * If the request contains a Query element, it must have attribute typeNames. * * The OGC 07-045 spec is more restrictive than OGC 07-006. * * OGC 07-006 10.8.4.8:/* w w w . j a va 2s.c om*/ * The typeNames parameter is a list of one or more names of queryable entities in the catalogue's information model * that may be constrained in the predicate of the query. In the case of XML realization of the OGC core metadata * properties (Subclause 10.2.5), the element csw:Record is the only queryable entity. Other information models may * include more than one queryable component. For example, queryable components for the XML realization of the ebRIM * include rim:Service, rim:ExtrinsicObject and rim:Association. In such cases the application profile shall * describe how multiple typeNames values should be processed. * In addition, all or some of the these queryable entity names may be specified in the query to define which * metadata record elements the query should present in the response to the GetRecords operation. * * OGC 07-045 8.2.2.1.1: * Mandatory: Must support *one* of csw:Record? or gmd:MD_Metadata? in a query. Default value is csw:Record?. * * (note how OGC 07-045 mixes up a mandatory parameter that has a default value !!) * * We'll go for the default value option rather than the mandatory-ness. So: if typeNames is not present or empty, * "csw:Record" is used. * * If the request does not contain exactly one (or comma-separated, both) of the values specified in OGC 07-045, * an exception is thrown. If both are present "gmd:MD_Metadata" is preferred. * * @param query query element * @return typeName * @throws MissingParameterValueEx if typeNames is missing * @throws InvalidParameterValueEx if typeNames does not have one of the mandated values */ private String checkTypenames(Element query) throws MissingParameterValueEx, InvalidParameterValueEx { if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checking typenames in query:\n" + Xml.getString(query)); } // // get the prefix used for CSW namespace used in this input document // String cswPrefix = getPrefixForNamespace(query, Csw.NAMESPACE_CSW); if (cswPrefix == null) { if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checktypenames: csw prefix not found, using " + Csw.NAMESPACE_CSW.getPrefix()); } cswPrefix = Csw.NAMESPACE_CSW.getPrefix(); } // // get the prefix used for GMD namespace used in this input document // String gmdPrefix = getPrefixForNamespace(query, Csw.NAMESPACE_GMD); if (gmdPrefix == null) { if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checktypenames: gmd prefix not found, using " + Csw.NAMESPACE_GMD.getPrefix()); } gmdPrefix = Csw.NAMESPACE_GMD.getPrefix(); } if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checktypenames: csw prefix set to " + cswPrefix + ", gmd prefix set to " + gmdPrefix); } Attribute typeNames = query.getAttribute("typeNames", query.getNamespace()); typeNames = query.getAttribute("typeNames"); if (typeNames != null) { String typeNamesValue = typeNames.getValue(); // empty typenames element if (StringUtils.isEmpty(typeNamesValue)) { return cswPrefix + ":Record"; } // not empty: scan comma-separated string Scanner commaSeparator = new Scanner(typeNamesValue); commaSeparator.useDelimiter(","); String result = cswPrefix + ":Record"; while (commaSeparator.hasNext()) { String typeName = commaSeparator.next(); typeName = typeName.trim(); if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checking typename in query:" + typeName); } if (!(typeName.equals(cswPrefix + ":Record") || typeName.equals(gmdPrefix + ":MD_Metadata"))) { throw new InvalidParameterValueEx("typeNames", "invalid value"); } if (typeName.equals(gmdPrefix + ":MD_Metadata")) { return typeName; } } return result; } // missing typeNames element else { return cswPrefix + ":Record"; } }
From source file:com.concursive.connect.web.modules.wiki.utils.WikiPDFUtils.java
private static String parseTable(WikiPDFContext context, Wiki wiki, String line, Document document, Connection db, ArrayList<Integer> wikiListTodo, ArrayList<Integer> wikiListDone, BufferedReader in) throws Exception { if (line == null) { return null; }/* w ww . j av a 2 s . c o m*/ PdfPTable pdfTable = null; int columnCount = 0; int rowCount = 0; // Keep track of the table's custom styles HashMap<Integer, String> cStyle = new HashMap<Integer, String>(); while (line != null && (line.startsWith("|") || line.startsWith("!"))) { // Build a complete line String lineToParse = line; while (!line.endsWith("|")) { line = in.readLine(); if (line == null) { // there is an error in the line to process return null; } if (line.startsWith("!")) { lineToParse += CRLF + line.substring(1); } } line = lineToParse; // Determine if the row can output boolean canOutput = true; ++rowCount; String cellType = null; Scanner sc = null; if (line.startsWith("||") && line.endsWith("||")) { cellType = "th"; sc = new Scanner(line).useDelimiter("[|][|]"); // sc = new Scanner(line.substring(2, line.length() - 2)).useDelimiter("[|][|]"); } else if (line.startsWith("|")) { cellType = "td"; sc = new Scanner(line.substring(1, line.length() - 1)).useDelimiter("\\|(?=[^\\]]*(?:\\[|$))"); } if (sc != null) { if (rowCount == 1) { // Count the columns, get the specified widths too... while (sc.hasNext()) { ++columnCount; sc.next(); } // Reset the scanner now that the columns have been counted if (line.startsWith("||") && line.endsWith("||")) { sc = new Scanner(line).useDelimiter("[|][|]"); } else if (line.startsWith("|")) { sc = new Scanner(line.substring(1, line.length() - 1)) .useDelimiter("\\|(?=[^\\]]*(?:\\[|$))"); } // Start the table pdfTable = new PdfPTable(columnCount); //pdfTable.setWidthPercentage(100); pdfTable.setHorizontalAlignment(Element.ALIGN_LEFT); pdfTable.setSpacingBefore(10); pdfTable.setWidthPercentage(100); pdfTable.setKeepTogether(true); } // Determine the column span int colSpan = 1; // Determine the cell being output int cellCount = 0; while (sc.hasNext()) { String cellData = sc.next(); if (cellData.length() == 0) { ++colSpan; continue; } // Track the cell count being output ++cellCount; if (rowCount == 1) { // Parse and validate the style input LOG.debug("Checking style value: " + cellData); if (cellData.startsWith("{") && cellData.endsWith("}")) { String[] style = cellData.substring(1, cellData.length() - 1).split(":"); String attribute = style[0].trim(); String value = style[1].trim(); // Determine the width of each column and store it if ("width".equals(attribute)) { // Validate the width style if (StringUtils.hasAllowedOnly("0123456789%.", value)) { cStyle.put(cellCount, attribute + ": " + value + ";"); } } else { LOG.debug("Unsupported style: " + cellData); } canOutput = false; } } // Output the header if (canOutput) { PdfPCell cell = new PdfPCell(); cell.setPadding(10); cell.setBorderColor(new Color(100, 100, 100)); if ("th".equals(cellType)) { cell.setHorizontalAlignment(Element.ALIGN_CENTER); cell.setBackgroundColor(new Color(0xC0, 0xC0, 0xC0)); } if (colSpan > 1) { cell.setColspan(colSpan); } // Output the data if (" ".equals(cellData) || "".equals(cellData)) { // Put a blank space in blank cells for output consistency cell.addElement(new Chunk(" ")); LOG.debug(" OUTPUTTING A BLANK"); } else { // Output the cell as a complete wiki float cellWidth = (100.0f / columnCount); parseContent(context, wiki, cellData, document, cell, db, wikiListTodo, wikiListDone, cellWidth); LOG.debug(" OUTPUTTING CONTENT"); } pdfTable.addCell(cell); } } } // read another line to see if it's part of the table line = in.readLine(); } if (pdfTable != null) { LOG.debug("document.add(pdfTable)"); document.add(pdfTable); // document.add(Chunk.NEWLINE); } return line; }
From source file:ml.shifu.shifu.core.ConfusionMatrix.java
public void computeConfusionMatixForMultipleClassification(long records) throws IOException { SourceType sourceType = evalConfig.getDataSet().getSource(); List<Scanner> scanners = ShifuFileUtils.getDataScanners(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);/* ww w.j a v a 2 s.co m*/ boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType); Set<String> tagSet = new HashSet<String>( modelConfig.getFlattenTags(modelConfig.getPosTags(evalConfig), modelConfig.getNegTags(evalConfig))); List<Set<String>> tags = modelConfig.getSetTags(modelConfig.getPosTags(evalConfig), modelConfig.getNegTags(evalConfig)); int classes = tags.size(); long cnt = 0, invalidTargetCnt = 0; ColumnConfig targetColumn = CommonUtils.findTargetColumn(columnConfigList); List<Integer> binCountNeg = targetColumn.getBinCountNeg(); List<Integer> binCountPos = targetColumn.getBinCountPos(); long[] binCount = new long[classes]; double[] binRatio = new double[classes]; long sumCnt = 0L; for (int i = 0; i < binCount.length; i++) { binCount[i] = binCountNeg.get(i) + binCountPos.get(i); sumCnt += binCount[i]; } for (int i = 0; i < binCount.length; i++) { binRatio[i] = (binCount[i] * 1d) / sumCnt; } long[][] confusionMatrix = new long[classes][classes]; for (Scanner scanner : scanners) { while (scanner.hasNext()) { if ((++cnt) % 100000 == 0) { LOG.info("Loaded " + cnt + " records."); } if (!isDir && cnt == 1) { // if the evaluation score file is the local file, skip the first line since we add header in continue; } // score is separated by default delimiter in our pig output format String[] raw = scanner.nextLine().split(Constants.DEFAULT_ESCAPE_DELIMITER); String tag = raw[targetColumnIndex]; if (StringUtils.isBlank(tag) || !tagSet.contains(tag)) { invalidTargetCnt += 1; continue; } double[] scores = new double[classes]; int predictIndex = -1; double maxScore = Double.NEGATIVE_INFINITY; if (CommonUtils.isTreeModel(modelConfig.getAlgorithm()) && !modelConfig.getTrain().isOneVsAll()) { // for RF native classification double[] tagCounts = new double[tags.size()]; for (int i = this.multiClassScore1Index; i < (raw.length - this.metaColumns); i++) { double dd = NumberFormatUtils.getDouble(raw[i], 0d); tagCounts[(int) dd] += 1d; } double maxVotes = -1d; for (int i = 0; i < tagCounts.length; i++) { if (tagCounts[i] > maxVotes) { predictIndex = i; maxScore = maxVotes = tagCounts[i]; } } } else if ((CommonUtils.isTreeModel(modelConfig.getAlgorithm()) || NNConstants.NN_ALG_NAME.equalsIgnoreCase(modelConfig.getAlgorithm())) && modelConfig.getTrain().isOneVsAll()) { // for RF, GBT & NN OneVsAll classification if (classes == 2) { // for binary classification, only one model is needed. for (int i = this.multiClassScore1Index; i < (1 + this.multiClassScore1Index); i++) { double dd = NumberFormatUtils.getDouble(raw[i], 0d); if (dd > ((1d - binRatio[i - this.multiClassScore1Index]) * scoreScale)) { predictIndex = 0; } else { predictIndex = 1; } } } else { // logic is here, per each onevsrest, it may be im-banlanced. for example, class a, b, c, first // is a(1) vs b and c(0), ratio is 10:1, then to compare score, if score > 1/11 it is positive, // check other models to see if still positive in b or c, take the largest one with ratio for // final prediction int[] predClasses = new int[classes]; double[] scoress = new double[classes]; double[] threhs = new double[classes]; for (int i = this.multiClassScore1Index; i < (classes + this.multiClassScore1Index); i++) { double dd = NumberFormatUtils.getDouble(raw[i], 0d); scoress[i - this.multiClassScore1Index] = dd; threhs[i - this.multiClassScore1Index] = (1d - binRatio[i - this.multiClassScore1Index]) * scoreScale; if (dd > ((1d - binRatio[i - this.multiClassScore1Index]) * scoreScale)) { predClasses[i - this.multiClassScore1Index] = 1; } } double maxRatio = -1d; double maxPositiveRatio = -1d; int maxRatioIndex = -1; for (int i = 0; i < binCount.length; i++) { if (binRatio[i] > maxRatio) { maxRatio = binRatio[i]; maxRatioIndex = i; } // if has positive, choose one with highest ratio if (predClasses[i] == 1) { if (binRatio[i] > maxPositiveRatio) { maxPositiveRatio = binRatio[i]; predictIndex = i; } } } // no any positive, take the largest one if (maxPositiveRatio < 0d) { predictIndex = maxRatioIndex; } } } else { if (classes == 2) { // for binary classification, only one model is needed. for (int i = this.multiClassScore1Index; i < (1 + this.multiClassScore1Index); i++) { double dd = NumberFormatUtils.getDouble(raw[i], 0d); if (dd > ((1d - binRatio[i - this.multiClassScore1Index]) * scoreScale)) { predictIndex = 0; } else { predictIndex = 1; } } } else { // only for NN & Native Multiple classification // 1,2,3 4,5,6: 1,2,3 is model 0, 4,5,6 is model 1 for (int i = 0; i < classes; i++) { for (int j = 0; j < multiClassModelCnt; j++) { double dd = NumberFormatUtils .getDouble(raw[this.multiClassScore1Index + j * classes + i], 0d); scores[i] += dd; } scores[i] /= multiClassModelCnt; if (scores[i] > maxScore) { predictIndex = i; maxScore = scores[i]; } } } } int tagIndex = -1; for (int i = 0; i < tags.size(); i++) { if (tags.get(i).contains(tag)) { tagIndex = i; break; } } confusionMatrix[tagIndex][predictIndex] += 1L; } scanner.close(); } LOG.info("Totally loading {} records with invalid target records {} in eval {}.", cnt, invalidTargetCnt, evalConfig.getName()); writeToConfMatrixFile(tags, confusionMatrix); // print conf matrix LOG.info("Multiple classification confustion matrix:"); LOG.info(String.format("%15s: %20s", " ", tags.toString())); for (int i = 0; i < confusionMatrix.length; i++) { LOG.info(String.format("%15s: %20s", tags.get(i), Arrays.toString(confusionMatrix[i]))); } }
From source file:org.fao.geonet.component.csw.GetRecords.java
/** * If the request contains a Query element, it must have attribute typeNames. * * The OGC 07-045 spec is more restrictive than OGC 07-006. * * OGC 07-006 10.8.4.8: The typeNames parameter is a list of one or more names of queryable * entities in the catalogue's information model that may be constrained in the predicate of the * query. In the case of XML realization of the OGC core metadata properties (Subclause 10.2.5), * the element csw:Record is the only queryable entity. Other information models may include * more than one queryable component. For example, queryable components for the XML realization * of the ebRIM include rim:Service, rim:ExtrinsicObject and rim:Association. In such cases the * application profile shall describe how multiple typeNames values should be processed. In * addition, all or some of the these queryable entity names may be specified in the query to * define which metadata record elements the query should present in the response to the * GetRecords operation.// ww w . j ava 2s.c o m * * OGC 07-045 8.2.2.1.1: Mandatory: Must support *one* of csw:Record or * gmd:MD_Metadata in a query. Default value is csw:Record. * * (note how OGC 07-045 mixes up a mandatory parameter that has a default value !!) * * We'll go for the default value option rather than the mandatory-ness. So: if typeNames is not * present or empty, "csw:Record" is used. * * If the request does not contain exactly one (or comma-separated, both) of the values * specified in OGC 07-045, an exception is thrown. If both are present "gmd:MD_Metadata" is * preferred. * * @param query query element * @param isStrict enable strict error message to comply with GDI-DE Testsuite test * csw:InterfaceBindings.GetRecords-InvalidRequest * @return typeName * @throws MissingParameterValueEx if typeNames is missing * @throws InvalidParameterValueEx if typeNames does not have one of the mandated values */ private String checkTypenames(Element query, boolean isStrict) throws MissingParameterValueEx, InvalidParameterValueEx { if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checking typenames in query:\n" + Xml.getString(query)); } // // get the prefix used for CSW namespace used in this input document // String cswPrefix = getPrefixForNamespace(query, Csw.NAMESPACE_CSW); if (cswPrefix == null) { if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checktypenames: csw prefix not found, using " + Csw.NAMESPACE_CSW.getPrefix()); } cswPrefix = Csw.NAMESPACE_CSW.getPrefix(); } // // get the prefix used for GMD namespace used in this input document // String gmdPrefix = getPrefixForNamespace(query, Csw.NAMESPACE_GMD); if (gmdPrefix == null) { if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checktypenames: gmd prefix not found, using " + Csw.NAMESPACE_GMD.getPrefix()); } gmdPrefix = Csw.NAMESPACE_GMD.getPrefix(); } if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checktypenames: csw prefix set to " + cswPrefix + ", gmd prefix set to " + gmdPrefix); } Attribute typeNames = query.getAttribute("typeNames", query.getNamespace()); typeNames = query.getAttribute("typeNames"); if (typeNames != null) { String typeNamesValue = typeNames.getValue(); // empty typenames element if (StringUtils.isEmpty(typeNamesValue)) { return cswPrefix + ":Record"; } // not empty: scan space-separated string @SuppressWarnings("resource") Scanner spaceScanner = new Scanner(typeNamesValue); spaceScanner.useDelimiter(" "); String result = cswPrefix + ":Record"; while (spaceScanner.hasNext()) { String typeName = spaceScanner.next(); typeName = typeName.trim(); if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) { Log.debug(Geonet.CSW_SEARCH, "checking typename in query:" + typeName); } if (!_schemaManager.getListOfTypeNames().contains(typeName)) { throw new InvalidParameterValueEx("typeNames", String.format("'%s' typename is not valid. Supported values are: %s", typeName, _schemaManager.getListOfTypeNames())); } if (typeName.equals(gmdPrefix + ":MD_Metadata")) { return typeName; } } return result; } // missing typeNames element else { if (isStrict) { //Mandatory check if strict. throw new MissingParameterValueEx("typeNames", String.format( "Attribute 'typeNames' is missing. Supported values are: %s. Default is csw:Record according to OGC 07-045.", _schemaManager.getListOfTypeNames())); } else { //Return default value according to OGC 07-045. return cswPrefix + ":Record"; } } }
From source file:com.github.pffy.chinese.freq.ChineseFrequency.java
private void analyze() { int inputCount = 0; int removedCount = 0; int hanziCount = 0; int uniqueHanziCount = 0; int processedCount = 0; int freq = 0; String csvOutput = this.HEADER_ROW_CSV; String tsvOutput = this.HEADER_ROW_TSV; String txtOutput = this.HEADER_ROW_TXT; String csv, tsv, txt;//from ww w . ja v a 2s .co m String str, input, pinyin, hanzi; Scanner sc; List<String> hanziList; Map<String, Integer> freqMap; JSONObject hpdx; String[] arr; Set<String> unmappedCharacters; hpdx = this.hpdx; input = this.input; inputCount = input.length(); input = retainHanzi(input); removedCount = inputCount - input.length(); hanziCount = input.length(); sc = new Scanner(input); sc.useDelimiter(""); hanziList = new ArrayList<String>(); freqMap = new HashMap<String, Integer>(); // counts occurrences while (sc.hasNext()) { str = sc.next(); hanziList.add(str); if (freqMap.containsKey(str)) { freqMap.put(str, (Integer) freqMap.get(str).intValue() + 1); } else { freqMap.put(str, 1); } } // done with Scanner sc.close(); uniqueHanziCount = freqMap.keySet().size(); SortedMap<String, String> freqTreeMap = new TreeMap<String, String>(Collections.reverseOrder()); unmappedCharacters = new HashSet<String>(); for (Entry<String, Integer> counts : freqMap.entrySet()) { try { hanzi = counts.getKey(); pinyin = hpdx.getString(hanzi); } catch (JSONException je) { // add this unmapped character to the list unmappedCharacters.add(counts.getKey()); // not idx mapped yet. that's ok. move on. continue; } if (pinyin.isEmpty()) { // if character is unmapped in idx, do not process. continue; } freq = counts.getValue(); freqTreeMap.put(String.format("%" + this.PADSIZE_FREQ + "s", freq).replace(' ', '0') + "-" + hanzi + "-" + pinyin, hanzi + "," + pinyin + "," + freq); processedCount++; } // outputs for (Entry<String, String> outputs : freqTreeMap.entrySet()) { csv = this.CRLF + outputs.getValue(); csvOutput += csv; tsv = csv.replaceAll(",", "\t"); tsvOutput += tsv; arr = csv.split(","); // arr[0] is hanzi. arr[1] is pinyin. arr[2] is freq. txt = padSummary(arr[0] + " [" + arr[1] + "]", this.PADSIZE_SUMMARY + 1) + arr[2]; txtOutput += txt; } // cleanup csvOutput = csvOutput.trim(); tsvOutput = tsvOutput.trim(); txtOutput = txtOutput.trim(); // post-process this.csvOutput = csvOutput; this.tsvOutput = tsvOutput; this.txtOutput = txtOutput; // counts this.inputCount = inputCount; this.removedCount = removedCount; this.hanziCount = hanziCount; this.uniqueHanziCount = uniqueHanziCount; this.processedCount = processedCount; this.unmappedCharacters = unmappedCharacters; // summary String summaryString = ""; summaryString += padSummary(this.MSG_TOTAL_COUNT, this.PADSIZE_SUMMARY) + inputCount; summaryString += this.CRLF + padSummary(this.MSG_REMOVED_COUNT, this.PADSIZE_SUMMARY) + removedCount; summaryString += this.CRLF + padSummary(this.MSG_HANZI_COUNT, this.PADSIZE_SUMMARY) + hanziCount; summaryString += this.CRLF + padSummary(this.MSG_UNIQUE_COUNT, this.PADSIZE_SUMMARY) + uniqueHanziCount; summaryString += this.CRLF + padSummary(this.MSG_PROCESSED_COUNT, this.PADSIZE_SUMMARY) + processedCount; this.summary = summaryString; }
From source file:com.inmobi.conduit.AbstractService.java
protected Table<String, Long, Long> parseCountersFile(FileSystem fs) { List<Path> partFiles = listPartFiles(tmpCounterOutputPath, fs); if (partFiles == null || partFiles.size() == 0) { LOG.warn("No counters files generated by mapred job"); return null; }//from w ww . j a v a 2s .c om Table<String, Long, Long> result = HashBasedTable.create(); for (Path filePath : partFiles) { FSDataInputStream fin = null; Scanner scanner = null; try { fin = fs.open(filePath); scanner = new Scanner(fin); while (scanner.hasNext()) { String counterNameValue = null; try { counterNameValue = scanner.next(); String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); if (tmp.length < 4) { LOG.error("Malformed counter name,skipping " + counterNameValue); continue; } String streamFileNameCombo = tmp[0] + ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER + tmp[1]; Long publishTimeWindow = Long.parseLong(tmp[2]); Long numOfMsgs = Long.parseLong(tmp[3]); result.put(streamFileNameCombo, publishTimeWindow, numOfMsgs); } catch (Exception e) { LOG.error("Counters file has malformed line with counter name = " + counterNameValue + " ..skipping the line", e); } } } catch (IOException e1) { LOG.error("Error while opening file " + filePath + " Skipping"); continue; } finally { try { if (fin != null) { fin.close(); } if (scanner != null) { scanner.close(); } } catch (Exception e) { LOG.warn("Error while closing file " + filePath + " or scanner"); } } } return result; }
From source file:ml.shifu.shifu.core.ConfusionMatrix.java
public PerformanceResult bufferedComputeConfusionMatrixAndPerformance(long pigPosTags, long pigNegTags, double pigPosWeightTags, double pigNegWeightTags, long records, double maxPScore, double minPScore, String scoreDataPath, String evalPerformancePath, boolean isPrint, boolean isGenerateChart, int targetColumnIndex, int scoreColumnIndex, int weightColumnIndex, boolean isUseMaxMinScore) throws IOException { // 1. compute maxScore and minScore in case some cases score are not in [0, 1] double maxScore = 1d * scoreScale, minScore = 0d; if (isGBTNeedConvertScore()) { // if need convert to [0, 1], just keep max score to 1 and min score to 0 without doing anything } else {// w w w. j av a 2 s.c o m if (isUseMaxMinScore) { // TODO some cases maxPScore is already scaled, how to fix that issue maxScore = maxPScore; minScore = minPScore; } else { // otherwise, keep [0, 1] } } LOG.info("{} Transformed (scale included) max score is {}, transformed min score is {}", evalConfig.getGbtScoreConvertStrategy(), maxScore, minScore); SourceType sourceType = evalConfig.getDataSet().getSource(); List<Scanner> scanners = ShifuFileUtils.getDataScanners(scoreDataPath, sourceType); LOG.info("Number of score files is {} in eval {}.", scanners.size(), evalConfig.getName()); int numBucket = evalConfig.getPerformanceBucketNum(); boolean hasWeight = StringUtils.isNotBlank(evalConfig.getDataSet().getWeightColumnName()); boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType); List<PerformanceObject> FPRList = new ArrayList<PerformanceObject>(numBucket + 1); List<PerformanceObject> catchRateList = new ArrayList<PerformanceObject>(numBucket + 1); List<PerformanceObject> gainList = new ArrayList<PerformanceObject>(numBucket + 1); List<PerformanceObject> modelScoreList = new ArrayList<PerformanceObject>(numBucket + 1); List<PerformanceObject> FPRWeightList = new ArrayList<PerformanceObject>(numBucket + 1); List<PerformanceObject> catchRateWeightList = new ArrayList<PerformanceObject>(numBucket + 1); List<PerformanceObject> gainWeightList = new ArrayList<PerformanceObject>(numBucket + 1); double binScore = (maxScore - minScore) * 1d / numBucket, binCapacity = 1.0 / numBucket, scoreBinCount = 0, scoreBinWeigthedCount = 0; int fpBin = 1, tpBin = 1, gainBin = 1, fpWeightBin = 1, tpWeightBin = 1, gainWeightBin = 1, modelScoreBin = 1; long index = 0, cnt = 0, invalidTargetCnt = 0, invalidWgtCnt = 0; ConfusionMatrixObject prevCmo = buildInitalCmo(pigPosTags, pigNegTags, pigPosWeightTags, pigNegWeightTags, maxScore); PerformanceObject po = buildFirstPO(prevCmo); FPRList.add(po); catchRateList.add(po); gainList.add(po); FPRWeightList.add(po); catchRateWeightList.add(po); gainWeightList.add(po); modelScoreList.add(po); boolean isGBTScoreHalfCutoffStreategy = isGBTScoreHalfCutoffStreategy(); boolean isGBTScoreMaxMinScaleStreategy = isGBTScoreMaxMinScaleStreategy(); Splitter splitter = Splitter.on(delimiter).trimResults(); for (Scanner scanner : scanners) { while (scanner.hasNext()) { if ((++cnt) % 100000L == 0L) { LOG.info("Loaded {} records.", cnt); } if ((!isDir) && cnt == 1) { // if the evaluation score file is the local file, skip the first line since we add continue; } // score is separated by default delimiter in our pig output format String[] raw = Lists.newArrayList(splitter.split(scanner.nextLine())).toArray(new String[0]); // tag check String tag = raw[targetColumnIndex]; if (StringUtils.isBlank(tag) || (!posTags.contains(tag) && !negTags.contains(tag))) { invalidTargetCnt += 1; continue; } double weight = 1d; // if has weight if (weightColumnIndex > 0) { try { weight = Double.parseDouble(raw[weightColumnIndex]); } catch (NumberFormatException e) { invalidWgtCnt += 1; } if (weight < 0d) { invalidWgtCnt += 1; weight = 1d; } } double score = 0.0; try { score = Double.parseDouble(raw[scoreColumnIndex]); } catch (NumberFormatException e) { // user set the score column wrong ? if (Math.random() < 0.05) { LOG.warn("The score column - {} is not number. Is score column set correctly?", raw[scoreColumnIndex]); } continue; } scoreBinCount += 1; scoreBinWeigthedCount += weight; ConfusionMatrixObject cmo = new ConfusionMatrixObject(prevCmo); if (posTags.contains(tag)) { // Positive Instance cmo.setTp(cmo.getTp() + 1); cmo.setFn(cmo.getFn() - 1); cmo.setWeightedTp(cmo.getWeightedTp() + weight * 1.0); cmo.setWeightedFn(cmo.getWeightedFn() - weight * 1.0); } else { // Negative Instance cmo.setFp(cmo.getFp() + 1); cmo.setTn(cmo.getTn() - 1); cmo.setWeightedFp(cmo.getWeightedFp() + weight * 1.0); cmo.setWeightedTn(cmo.getWeightedTn() - weight * 1.0); } if (isGBTScoreHalfCutoffStreategy) { // half cut off means score <0 then set to 0 and then min score is 0, max score is raw max score, // use max min scale to rescale to [0, 1] if (score < 0d) { score = 0d; } score = ((score - 0) * scoreScale) / (maxPScore - 0); } else if (isGBTScoreMaxMinScaleStreategy) { // use max min scaler to make score in [0, 1], don't foget to time scoreScale score = ((score - minPScore) * scoreScale) / (maxPScore - minPScore); } else { // do nothing, use current score } cmo.setScore(Double.parseDouble(SCORE_FORMAT.format(score))); ConfusionMatrixObject object = cmo; po = PerformanceEvaluator.setPerformanceObject(object); if (po.fpr >= fpBin * binCapacity) { po.binNum = fpBin++; FPRList.add(po); } if (po.recall >= tpBin * binCapacity) { po.binNum = tpBin++; catchRateList.add(po); } // prevent 99% double validRecordCnt = (double) (index + 1); if (validRecordCnt / (pigPosTags + pigNegTags) >= gainBin * binCapacity) { po.binNum = gainBin++; gainList.add(po); } if (po.weightedFpr >= fpWeightBin * binCapacity) { po.binNum = fpWeightBin++; FPRWeightList.add(po); } if (po.weightedRecall >= tpWeightBin * binCapacity) { po.binNum = tpWeightBin++; catchRateWeightList.add(po); } if ((object.getWeightedTp() + object.getWeightedFp()) / object.getWeightedTotal() >= gainWeightBin * binCapacity) { po.binNum = gainWeightBin++; gainWeightList.add(po); } if ((maxScore - (modelScoreBin * binScore)) >= score) { po.binNum = modelScoreBin++; po.scoreCount = scoreBinCount; po.scoreWgtCount = scoreBinWeigthedCount; // System.out.println("score count is " + scoreBinCount); // reset to 0 for next bin score cnt stats scoreBinCount = scoreBinWeigthedCount = 0; modelScoreList.add(po); } index += 1; prevCmo = cmo; } scanner.close(); } LOG.info( "Totally loading {} records with invalid target records {} and invalid weight records {} in eval {}.", cnt, invalidTargetCnt, invalidWgtCnt, evalConfig.getName()); PerformanceResult result = buildPerfResult(FPRList, catchRateList, gainList, modelScoreList, FPRWeightList, catchRateWeightList, gainWeightList); synchronized (this.lock) { if (isPrint) { PerformanceEvaluator.logResult(FPRList, "Bucketing False Positive Rate"); if (hasWeight) { PerformanceEvaluator.logResult(FPRWeightList, "Bucketing Weighted False Positive Rate"); } PerformanceEvaluator.logResult(catchRateList, "Bucketing Catch Rate"); if (hasWeight) { PerformanceEvaluator.logResult(catchRateWeightList, "Bucketing Weighted Catch Rate"); } PerformanceEvaluator.logResult(gainList, "Bucketing Action Rate"); if (hasWeight) { PerformanceEvaluator.logResult(gainWeightList, "Bucketing Weighted Action Rate"); } PerformanceEvaluator.logAucResult(result, hasWeight); } writePerResult2File(evalPerformancePath, result); if (isGenerateChart) { generateChartAndJsonPerfFiles(hasWeight, result); } } if (cnt == 0) { LOG.error("No score read, the EvalScore did not genernate or is null file"); throw new ShifuException(ShifuErrorCode.ERROR_EVALSCORE); } return result; }
From source file:org.soyatec.windowsazure.table.internal.CloudTableRest.java
/** * Make call to table service to send the batch operations. */// w w w . ja v a2 s.co m private void performBatch() { getRetryPolicy().execute(new Callable<Object>() { public Object call() throws Exception { String path = IBatchExecutor.BATCH_PATH; String queryString = ""; ResourceUriComponents uriComponents = new ResourceUriComponents(getAccountName(), path, queryString); String batchBoundary = IBatchExecutor.BATCH_BOUNDARY_PREFIX + Utilities.computeMD5(String.valueOf(new Date().getTime())); String changesetBoundary = IBatchExecutor.CHANGESET_BOUNDARY_PREFIX + Utilities.computeMD5(String.valueOf(new Date().getTime())); URI uri = HttpUtilities.createRequestUri(getBaseUri(), isUsePathStyleUris(), getAccountName(), path, null, getTimeout(), null, uriComponents); HttpRequest request = HttpUtilities.createHttpRequest(uri, HttpMethod.Post); request.setHeader(HeaderNames.ApiVersion, XmsVersion.VERSION_2009_07_17); request.addHeader(HeaderNames.ContentType, "multipart/mixed; boundary=" + batchBoundary); String body = assambleBatchOperationBody(batch, batchBoundary, changesetBoundary); ((HttpEntityEnclosingRequest) request).setEntity(new ByteArrayEntity(body.getBytes())); credentials.signRequestForSharedKeyLite(request, uriComponents); try { HttpWebResponse response = HttpUtilities.getResponse(request); if (response.getStatusCode() == HttpStatus.SC_ACCEPTED) { StringBuilder buf = new StringBuilder(1024); Scanner in = new Scanner(response.getStream()); while (in.hasNext()) { buf.append(in.nextLine()).append("\n"); } String errorMessage = Utilities.retrieveErrorMessages(buf.toString()); if (errorMessage != null) { throw new StorageServerException(StorageErrorCode.BatchOperationError, errorMessage, response.getStatusCode(), null); } else { Logger.log(buf.toString()); } response.close(); } else { HttpUtilities.processUnexpectedStatusCode(response); } } catch (StorageException we) { throw HttpUtilities.translateWebException(we); } return null; } }); }