Example usage for java.util Scanner hasNext

Introduction

In this page you can find the example usage for java.util Scanner hasNext.

Prototype

public boolean hasNext()

Source Link

Document

Returns true if this scanner has another token in its input.

Usage

From source file:com.tesora.dve.tools.CLIBuilder.java

private List<File> scanFilesOptional(Scanner scanner) throws PEException {
    final List<File> files = new ArrayList<File>();
    while (scanner.hasNext()) {
        final File scanned = scanFile(scanner);
        if (scanned != null) {
            files.add(scanned);/*from  w  w w  .j a v  a  2 s . c  o m*/
        }
    }

    return files;
}

From source file:com.gatf.executor.core.AcceptanceTestContext.java

private void initSoapContextAndHttpHeaders() throws Exception {
    Field[] declaredFields = HttpHeaders.class.getDeclaredFields();
    for (Field field : declaredFields) {
        if (java.lang.reflect.Modifier.isStatic(field.getModifiers()) && field.getType().equals(String.class)) {
            httpHeaders.put(field.get(null).toString().toLowerCase(), field.get(null).toString());
        }//  ww  w. ja va 2s.  c  o  m
    }

    File file = null;
    if (gatfExecutorConfig.getWsdlLocFile() != null && !gatfExecutorConfig.getWsdlLocFile().trim().isEmpty())
        file = getResourceFile(gatfExecutorConfig.getWsdlLocFile());

    if (file != null) {
        Scanner s = new Scanner(file);
        s.useDelimiter("\n");
        List<String> list = new ArrayList<String>();
        while (s.hasNext()) {
            list.add(s.next().replace("\r", ""));
        }
        s.close();

        for (String wsdlLoc : list) {
            if (!wsdlLoc.trim().isEmpty()) {
                String[] wsdlLocParts = wsdlLoc.split(",");
                logger.info("Started Parsing WSDL location - " + wsdlLocParts[1]);
                Wsdl wsdl = Wsdl.parse(wsdlLocParts[1]);
                for (QName bindingName : wsdl.getBindings()) {
                    SoapBuilder builder = wsdl.getBuilder(bindingName);
                    for (SoapOperation operation : builder.getOperations()) {
                        String request = builder.buildInputMessage(operation);
                        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
                        DocumentBuilder db = dbf.newDocumentBuilder();
                        Document soapMessage = db.parse(new ByteArrayInputStream(request.getBytes()));

                        if (gatfExecutorConfig.isDistributedLoadTests()) {
                            soapStrMessages.put(wsdlLocParts[0] + operation.getOperationName(), request);
                        }

                        soapMessages.put(wsdlLocParts[0] + operation.getOperationName(), soapMessage);
                        if (operation.getSoapAction() != null) {
                            soapActions.put(wsdlLocParts[0] + operation.getOperationName(),
                                    operation.getSoapAction());
                        }
                        logger.info("Adding message for SOAP operation - " + operation.getOperationName());
                    }
                    soapEndpoints.put(wsdlLocParts[0], builder.getServiceUrls().get(0));
                    logger.info("Adding SOAP Service endpoint - " + builder.getServiceUrls().get(0));
                }
                logger.info("Done Parsing WSDL location - " + wsdlLocParts[1]);
            }
        }
    }
}

From source file:org.fao.geonet.kernel.csw.services.GetRecords.java

/**
 * If the request contains a Query element, it must have attribute typeNames.
 *
 * The OGC 07-045 spec is more restrictive than OGC 07-006.
 *
 * OGC 07-006 10.8.4.8:/* w w  w . j a va  2s.c om*/
 * The typeNames parameter is a list of one or more names of queryable entities in the catalogue's information model
 * that may be constrained in the predicate of the query. In the case of XML realization of the OGC core metadata
 * properties (Subclause 10.2.5), the element csw:Record is the only queryable entity. Other information models may
 * include more than one queryable component. For example, queryable components for the XML realization of the ebRIM
 * include rim:Service, rim:ExtrinsicObject and rim:Association. In such cases the application profile shall
 * describe how multiple typeNames values should be processed.
 * In addition, all or some of the these queryable entity names may be specified in the query to define which
 * metadata record elements the query should present in the response to the GetRecords operation.
 *
 * OGC 07-045 8.2.2.1.1:
 * Mandatory: Must support *one* of csw:Record? or gmd:MD_Metadata? in a query. Default value is csw:Record?.
 *
 * (note how OGC 07-045 mixes up a mandatory parameter that has a default value !!)
 *
 * We'll go for the default value option rather than the mandatory-ness. So: if typeNames is not present or empty,
 * "csw:Record" is used.
 *
 * If the request does not contain exactly one (or comma-separated, both) of the values specified in OGC 07-045,
 * an exception is thrown. If both are present "gmd:MD_Metadata" is preferred.
 *
 * @param query query element
 * @return typeName
 * @throws MissingParameterValueEx if typeNames is missing
 * @throws InvalidParameterValueEx if typeNames does not have one of the mandated values
 */
private String checkTypenames(Element query) throws MissingParameterValueEx, InvalidParameterValueEx {
    if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
        Log.debug(Geonet.CSW_SEARCH, "checking typenames in query:\n" + Xml.getString(query));
    }
    //
    // get the prefix used for CSW namespace used in this input document
    //
    String cswPrefix = getPrefixForNamespace(query, Csw.NAMESPACE_CSW);
    if (cswPrefix == null) {
        if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
            Log.debug(Geonet.CSW_SEARCH,
                    "checktypenames: csw prefix not found, using " + Csw.NAMESPACE_CSW.getPrefix());
        }
        cswPrefix = Csw.NAMESPACE_CSW.getPrefix();
    }
    //
    // get the prefix used for GMD namespace used in this input document
    //
    String gmdPrefix = getPrefixForNamespace(query, Csw.NAMESPACE_GMD);
    if (gmdPrefix == null) {
        if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
            Log.debug(Geonet.CSW_SEARCH,
                    "checktypenames: gmd prefix not found, using " + Csw.NAMESPACE_GMD.getPrefix());
        }
        gmdPrefix = Csw.NAMESPACE_GMD.getPrefix();
    }
    if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
        Log.debug(Geonet.CSW_SEARCH,
                "checktypenames: csw prefix set to " + cswPrefix + ", gmd prefix set to " + gmdPrefix);
    }

    Attribute typeNames = query.getAttribute("typeNames", query.getNamespace());
    typeNames = query.getAttribute("typeNames");
    if (typeNames != null) {
        String typeNamesValue = typeNames.getValue();
        // empty typenames element
        if (StringUtils.isEmpty(typeNamesValue)) {
            return cswPrefix + ":Record";
        }
        // not empty: scan comma-separated string
        Scanner commaSeparator = new Scanner(typeNamesValue);
        commaSeparator.useDelimiter(",");
        String result = cswPrefix + ":Record";
        while (commaSeparator.hasNext()) {
            String typeName = commaSeparator.next();
            typeName = typeName.trim();
            if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
                Log.debug(Geonet.CSW_SEARCH, "checking typename in query:" + typeName);
            }
            if (!(typeName.equals(cswPrefix + ":Record") || typeName.equals(gmdPrefix + ":MD_Metadata"))) {
                throw new InvalidParameterValueEx("typeNames", "invalid value");
            }
            if (typeName.equals(gmdPrefix + ":MD_Metadata")) {
                return typeName;
            }
        }
        return result;
    }
    // missing typeNames element
    else {
        return cswPrefix + ":Record";
    }
}

From source file:com.concursive.connect.web.modules.wiki.utils.WikiPDFUtils.java

private static String parseTable(WikiPDFContext context, Wiki wiki, String line, Document document,
        Connection db, ArrayList<Integer> wikiListTodo, ArrayList<Integer> wikiListDone, BufferedReader in)
        throws Exception {
    if (line == null) {
        return null;
    }/*  w  ww  . j  av  a  2  s  .  c  o m*/
    PdfPTable pdfTable = null;
    int columnCount = 0;
    int rowCount = 0;

    // Keep track of the table's custom styles
    HashMap<Integer, String> cStyle = new HashMap<Integer, String>();

    while (line != null && (line.startsWith("|") || line.startsWith("!"))) {

        // Build a complete line
        String lineToParse = line;
        while (!line.endsWith("|")) {
            line = in.readLine();
            if (line == null) {
                // there is an error in the line to process
                return null;
            }
            if (line.startsWith("!")) {
                lineToParse += CRLF + line.substring(1);
            }
        }
        line = lineToParse;

        // Determine if the row can output
        boolean canOutput = true;

        ++rowCount;

        String cellType = null;
        Scanner sc = null;
        if (line.startsWith("||") && line.endsWith("||")) {
            cellType = "th";
            sc = new Scanner(line).useDelimiter("[|][|]");
            //        sc = new Scanner(line.substring(2, line.length() - 2)).useDelimiter("[|][|]");
        } else if (line.startsWith("|")) {
            cellType = "td";
            sc = new Scanner(line.substring(1, line.length() - 1)).useDelimiter("\\|(?=[^\\]]*(?:\\[|$))");
        }

        if (sc != null) {

            if (rowCount == 1) {
                // Count the columns, get the specified widths too...
                while (sc.hasNext()) {
                    ++columnCount;
                    sc.next();
                }
                // Reset the scanner now that the columns have been counted
                if (line.startsWith("||") && line.endsWith("||")) {
                    sc = new Scanner(line).useDelimiter("[|][|]");
                } else if (line.startsWith("|")) {
                    sc = new Scanner(line.substring(1, line.length() - 1))
                            .useDelimiter("\\|(?=[^\\]]*(?:\\[|$))");
                }

                // Start the table
                pdfTable = new PdfPTable(columnCount);
                //pdfTable.setWidthPercentage(100);
                pdfTable.setHorizontalAlignment(Element.ALIGN_LEFT);
                pdfTable.setSpacingBefore(10);
                pdfTable.setWidthPercentage(100);
                pdfTable.setKeepTogether(true);
            }

            // Determine the column span
            int colSpan = 1;
            // Determine the cell being output
            int cellCount = 0;

            while (sc.hasNext()) {
                String cellData = sc.next();
                if (cellData.length() == 0) {
                    ++colSpan;
                    continue;
                }

                // Track the cell count being output
                ++cellCount;

                if (rowCount == 1) {
                    // Parse and validate the style input
                    LOG.debug("Checking style value: " + cellData);
                    if (cellData.startsWith("{") && cellData.endsWith("}")) {
                        String[] style = cellData.substring(1, cellData.length() - 1).split(":");
                        String attribute = style[0].trim();
                        String value = style[1].trim();
                        // Determine the width of each column and store it
                        if ("width".equals(attribute)) {
                            // Validate the width style
                            if (StringUtils.hasAllowedOnly("0123456789%.", value)) {
                                cStyle.put(cellCount, attribute + ": " + value + ";");
                            }
                        } else {
                            LOG.debug("Unsupported style: " + cellData);
                        }
                        canOutput = false;
                    }
                }

                // Output the header
                if (canOutput) {

                    PdfPCell cell = new PdfPCell();
                    cell.setPadding(10);
                    cell.setBorderColor(new Color(100, 100, 100));
                    if ("th".equals(cellType)) {
                        cell.setHorizontalAlignment(Element.ALIGN_CENTER);
                        cell.setBackgroundColor(new Color(0xC0, 0xC0, 0xC0));
                    }
                    if (colSpan > 1) {
                        cell.setColspan(colSpan);
                    }

                    // Output the data
                    if (" ".equals(cellData) || "".equals(cellData)) {
                        // Put a blank space in blank cells for output consistency
                        cell.addElement(new Chunk(" "));
                        LOG.debug("   OUTPUTTING A BLANK");
                    } else {
                        // Output the cell as a complete wiki
                        float cellWidth = (100.0f / columnCount);
                        parseContent(context, wiki, cellData, document, cell, db, wikiListTodo, wikiListDone,
                                cellWidth);
                        LOG.debug("   OUTPUTTING CONTENT");
                    }
                    pdfTable.addCell(cell);
                }
            }
        }
        // read another line to see if it's part of the table
        line = in.readLine();
    }
    if (pdfTable != null) {
        LOG.debug("document.add(pdfTable)");
        document.add(pdfTable);
        //          document.add(Chunk.NEWLINE);
    }
    return line;
}

From source file:ml.shifu.shifu.core.ConfusionMatrix.java

public void computeConfusionMatixForMultipleClassification(long records) throws IOException {
    SourceType sourceType = evalConfig.getDataSet().getSource();

    List<Scanner> scanners = ShifuFileUtils.getDataScanners(pathFinder.getEvalScorePath(evalConfig, sourceType),
            sourceType);/*  ww w.j a  v a  2  s.co  m*/
    boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
    Set<String> tagSet = new HashSet<String>(
            modelConfig.getFlattenTags(modelConfig.getPosTags(evalConfig), modelConfig.getNegTags(evalConfig)));
    List<Set<String>> tags = modelConfig.getSetTags(modelConfig.getPosTags(evalConfig),
            modelConfig.getNegTags(evalConfig));

    int classes = tags.size();
    long cnt = 0, invalidTargetCnt = 0;

    ColumnConfig targetColumn = CommonUtils.findTargetColumn(columnConfigList);

    List<Integer> binCountNeg = targetColumn.getBinCountNeg();
    List<Integer> binCountPos = targetColumn.getBinCountPos();
    long[] binCount = new long[classes];
    double[] binRatio = new double[classes];
    long sumCnt = 0L;
    for (int i = 0; i < binCount.length; i++) {
        binCount[i] = binCountNeg.get(i) + binCountPos.get(i);
        sumCnt += binCount[i];
    }
    for (int i = 0; i < binCount.length; i++) {
        binRatio[i] = (binCount[i] * 1d) / sumCnt;
    }

    long[][] confusionMatrix = new long[classes][classes];
    for (Scanner scanner : scanners) {
        while (scanner.hasNext()) {
            if ((++cnt) % 100000 == 0) {
                LOG.info("Loaded " + cnt + " records.");
            }
            if (!isDir && cnt == 1) {
                // if the evaluation score file is the local file, skip the first line since we add header in
                continue;
            }

            // score is separated by default delimiter in our pig output format
            String[] raw = scanner.nextLine().split(Constants.DEFAULT_ESCAPE_DELIMITER);

            String tag = raw[targetColumnIndex];
            if (StringUtils.isBlank(tag) || !tagSet.contains(tag)) {
                invalidTargetCnt += 1;
                continue;
            }

            double[] scores = new double[classes];
            int predictIndex = -1;
            double maxScore = Double.NEGATIVE_INFINITY;

            if (CommonUtils.isTreeModel(modelConfig.getAlgorithm()) && !modelConfig.getTrain().isOneVsAll()) {
                // for RF native classification
                double[] tagCounts = new double[tags.size()];
                for (int i = this.multiClassScore1Index; i < (raw.length - this.metaColumns); i++) {
                    double dd = NumberFormatUtils.getDouble(raw[i], 0d);
                    tagCounts[(int) dd] += 1d;
                }
                double maxVotes = -1d;
                for (int i = 0; i < tagCounts.length; i++) {
                    if (tagCounts[i] > maxVotes) {
                        predictIndex = i;
                        maxScore = maxVotes = tagCounts[i];
                    }
                }
            } else if ((CommonUtils.isTreeModel(modelConfig.getAlgorithm())
                    || NNConstants.NN_ALG_NAME.equalsIgnoreCase(modelConfig.getAlgorithm()))
                    && modelConfig.getTrain().isOneVsAll()) {
                // for RF, GBT & NN OneVsAll classification
                if (classes == 2) {
                    // for binary classification, only one model is needed.
                    for (int i = this.multiClassScore1Index; i < (1 + this.multiClassScore1Index); i++) {
                        double dd = NumberFormatUtils.getDouble(raw[i], 0d);
                        if (dd > ((1d - binRatio[i - this.multiClassScore1Index]) * scoreScale)) {
                            predictIndex = 0;
                        } else {
                            predictIndex = 1;
                        }
                    }
                } else {
                    // logic is here, per each onevsrest, it may be im-banlanced. for example, class a, b, c, first
                    // is a(1) vs b and c(0), ratio is 10:1, then to compare score, if score > 1/11 it is positive,
                    // check other models to see if still positive in b or c, take the largest one with ratio for
                    // final prediction
                    int[] predClasses = new int[classes];
                    double[] scoress = new double[classes];
                    double[] threhs = new double[classes];

                    for (int i = this.multiClassScore1Index; i < (classes + this.multiClassScore1Index); i++) {
                        double dd = NumberFormatUtils.getDouble(raw[i], 0d);
                        scoress[i - this.multiClassScore1Index] = dd;
                        threhs[i - this.multiClassScore1Index] = (1d - binRatio[i - this.multiClassScore1Index])
                                * scoreScale;
                        if (dd > ((1d - binRatio[i - this.multiClassScore1Index]) * scoreScale)) {
                            predClasses[i - this.multiClassScore1Index] = 1;
                        }
                    }

                    double maxRatio = -1d;
                    double maxPositiveRatio = -1d;
                    int maxRatioIndex = -1;
                    for (int i = 0; i < binCount.length; i++) {
                        if (binRatio[i] > maxRatio) {
                            maxRatio = binRatio[i];
                            maxRatioIndex = i;
                        }
                        // if has positive, choose one with highest ratio
                        if (predClasses[i] == 1) {
                            if (binRatio[i] > maxPositiveRatio) {
                                maxPositiveRatio = binRatio[i];
                                predictIndex = i;
                            }
                        }
                    }
                    // no any positive, take the largest one
                    if (maxPositiveRatio < 0d) {
                        predictIndex = maxRatioIndex;
                    }
                }
            } else {
                if (classes == 2) {
                    // for binary classification, only one model is needed.
                    for (int i = this.multiClassScore1Index; i < (1 + this.multiClassScore1Index); i++) {
                        double dd = NumberFormatUtils.getDouble(raw[i], 0d);
                        if (dd > ((1d - binRatio[i - this.multiClassScore1Index]) * scoreScale)) {
                            predictIndex = 0;
                        } else {
                            predictIndex = 1;
                        }
                    }
                } else {
                    // only for NN & Native Multiple classification
                    // 1,2,3 4,5,6: 1,2,3 is model 0, 4,5,6 is model 1
                    for (int i = 0; i < classes; i++) {
                        for (int j = 0; j < multiClassModelCnt; j++) {
                            double dd = NumberFormatUtils
                                    .getDouble(raw[this.multiClassScore1Index + j * classes + i], 0d);
                            scores[i] += dd;
                        }
                        scores[i] /= multiClassModelCnt;
                        if (scores[i] > maxScore) {
                            predictIndex = i;
                            maxScore = scores[i];
                        }
                    }
                }
            }
            int tagIndex = -1;
            for (int i = 0; i < tags.size(); i++) {
                if (tags.get(i).contains(tag)) {
                    tagIndex = i;
                    break;
                }
            }
            confusionMatrix[tagIndex][predictIndex] += 1L;
        }
        scanner.close();
    }

    LOG.info("Totally loading {} records with invalid target records {} in eval {}.", cnt, invalidTargetCnt,
            evalConfig.getName());

    writeToConfMatrixFile(tags, confusionMatrix);

    // print conf matrix
    LOG.info("Multiple classification confustion matrix:");
    LOG.info(String.format("%15s: %20s", "     ", tags.toString()));
    for (int i = 0; i < confusionMatrix.length; i++) {
        LOG.info(String.format("%15s: %20s", tags.get(i), Arrays.toString(confusionMatrix[i])));
    }
}

From source file:org.fao.geonet.component.csw.GetRecords.java

/**
 * If the request contains a Query element, it must have attribute typeNames.
 *
 * The OGC 07-045 spec is more restrictive than OGC 07-006.
 *
 * OGC 07-006 10.8.4.8: The typeNames parameter is a list of one or more names of queryable
 * entities in the catalogue's information model that may be constrained in the predicate of the
 * query. In the case of XML realization of the OGC core metadata properties (Subclause 10.2.5),
 * the element csw:Record is the only queryable entity. Other information models may include
 * more than one queryable component. For example, queryable components for the XML realization
 * of the ebRIM include rim:Service, rim:ExtrinsicObject and rim:Association. In such cases the
 * application profile shall describe how multiple typeNames values should be processed. In
 * addition, all or some of the these queryable entity names may be specified in the query to
 * define which metadata record elements the query should present in the response to the
 * GetRecords operation.// ww w . j ava  2s.c o  m
 *
 * OGC 07-045 8.2.2.1.1: Mandatory: Must support *one* of csw:Record or
 * gmd:MD_Metadata in a query. Default value is csw:Record.
 *
 * (note how OGC 07-045 mixes up a mandatory parameter that has a default value !!)
 *
 * We'll go for the default value option rather than the mandatory-ness. So: if typeNames is not
 * present or empty, "csw:Record" is used.
 *
 * If the request does not contain exactly one (or comma-separated, both) of the values
 * specified in OGC 07-045, an exception is thrown. If both are present "gmd:MD_Metadata" is
 * preferred.
 *
 * @param query    query element
 * @param isStrict enable strict error message to comply with GDI-DE Testsuite test
 *                 csw:InterfaceBindings.GetRecords-InvalidRequest
 * @return typeName
 * @throws MissingParameterValueEx if typeNames is missing
 * @throws InvalidParameterValueEx if typeNames does not have one of the mandated values
 */
private String checkTypenames(Element query, boolean isStrict)
        throws MissingParameterValueEx, InvalidParameterValueEx {
    if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
        Log.debug(Geonet.CSW_SEARCH, "checking typenames in query:\n" + Xml.getString(query));
    }
    //
    // get the prefix used for CSW namespace used in this input document
    //
    String cswPrefix = getPrefixForNamespace(query, Csw.NAMESPACE_CSW);
    if (cswPrefix == null) {
        if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
            Log.debug(Geonet.CSW_SEARCH,
                    "checktypenames: csw prefix not found, using " + Csw.NAMESPACE_CSW.getPrefix());
        }
        cswPrefix = Csw.NAMESPACE_CSW.getPrefix();
    }
    //
    // get the prefix used for GMD namespace used in this input document
    //
    String gmdPrefix = getPrefixForNamespace(query, Csw.NAMESPACE_GMD);
    if (gmdPrefix == null) {
        if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
            Log.debug(Geonet.CSW_SEARCH,
                    "checktypenames: gmd prefix not found, using " + Csw.NAMESPACE_GMD.getPrefix());
        }
        gmdPrefix = Csw.NAMESPACE_GMD.getPrefix();
    }
    if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
        Log.debug(Geonet.CSW_SEARCH,
                "checktypenames: csw prefix set to " + cswPrefix + ", gmd prefix set to " + gmdPrefix);
    }

    Attribute typeNames = query.getAttribute("typeNames", query.getNamespace());
    typeNames = query.getAttribute("typeNames");
    if (typeNames != null) {
        String typeNamesValue = typeNames.getValue();
        // empty typenames element
        if (StringUtils.isEmpty(typeNamesValue)) {
            return cswPrefix + ":Record";
        }
        // not empty: scan space-separated string
        @SuppressWarnings("resource")
        Scanner spaceScanner = new Scanner(typeNamesValue);
        spaceScanner.useDelimiter(" ");
        String result = cswPrefix + ":Record";
        while (spaceScanner.hasNext()) {
            String typeName = spaceScanner.next();
            typeName = typeName.trim();
            if (Log.isDebugEnabled(Geonet.CSW_SEARCH)) {
                Log.debug(Geonet.CSW_SEARCH, "checking typename in query:" + typeName);
            }

            if (!_schemaManager.getListOfTypeNames().contains(typeName)) {
                throw new InvalidParameterValueEx("typeNames",
                        String.format("'%s' typename is not valid. Supported values are: %s", typeName,
                                _schemaManager.getListOfTypeNames()));
            }
            if (typeName.equals(gmdPrefix + ":MD_Metadata")) {
                return typeName;
            }
        }
        return result;
    }
    // missing typeNames element
    else {
        if (isStrict) {
            //Mandatory check if strict.
            throw new MissingParameterValueEx("typeNames", String.format(
                    "Attribute 'typeNames' is missing. Supported values are: %s. Default is csw:Record according to OGC 07-045.",
                    _schemaManager.getListOfTypeNames()));
        } else {
            //Return default value according to OGC 07-045.
            return cswPrefix + ":Record";
        }
    }
}

From source file:com.github.pffy.chinese.freq.ChineseFrequency.java

private void analyze() {

    int inputCount = 0;
    int removedCount = 0;
    int hanziCount = 0;
    int uniqueHanziCount = 0;
    int processedCount = 0;

    int freq = 0;

    String csvOutput = this.HEADER_ROW_CSV;
    String tsvOutput = this.HEADER_ROW_TSV;
    String txtOutput = this.HEADER_ROW_TXT;

    String csv, tsv, txt;//from   ww w .  ja v a  2s  .co m
    String str, input, pinyin, hanzi;
    Scanner sc;
    List<String> hanziList;
    Map<String, Integer> freqMap;
    JSONObject hpdx;
    String[] arr;

    Set<String> unmappedCharacters;

    hpdx = this.hpdx;

    input = this.input;
    inputCount = input.length();

    input = retainHanzi(input);
    removedCount = inputCount - input.length();

    hanziCount = input.length();

    sc = new Scanner(input);
    sc.useDelimiter("");

    hanziList = new ArrayList<String>();
    freqMap = new HashMap<String, Integer>();

    // counts occurrences
    while (sc.hasNext()) {

        str = sc.next();
        hanziList.add(str);

        if (freqMap.containsKey(str)) {
            freqMap.put(str, (Integer) freqMap.get(str).intValue() + 1);
        } else {
            freqMap.put(str, 1);
        }
    }

    // done with Scanner
    sc.close();

    uniqueHanziCount = freqMap.keySet().size();

    SortedMap<String, String> freqTreeMap = new TreeMap<String, String>(Collections.reverseOrder());

    unmappedCharacters = new HashSet<String>();
    for (Entry<String, Integer> counts : freqMap.entrySet()) {

        try {

            hanzi = counts.getKey();
            pinyin = hpdx.getString(hanzi);

        } catch (JSONException je) {

            // add this unmapped character to the list
            unmappedCharacters.add(counts.getKey());

            // not idx mapped yet. that's ok. move on.
            continue;
        }

        if (pinyin.isEmpty()) {
            // if character is unmapped in idx, do not process.
            continue;
        }

        freq = counts.getValue();

        freqTreeMap.put(String.format("%" + this.PADSIZE_FREQ + "s", freq).replace(' ', '0') + "-" + hanzi + "-"
                + pinyin, hanzi + "," + pinyin + "," + freq);
        processedCount++;
    }

    // outputs
    for (Entry<String, String> outputs : freqTreeMap.entrySet()) {

        csv = this.CRLF + outputs.getValue();
        csvOutput += csv;

        tsv = csv.replaceAll(",", "\t");
        tsvOutput += tsv;

        arr = csv.split(",");

        // arr[0] is hanzi. arr[1] is pinyin. arr[2] is freq.
        txt = padSummary(arr[0] + " [" + arr[1] + "]", this.PADSIZE_SUMMARY + 1) + arr[2];
        txtOutput += txt;
    }

    // cleanup
    csvOutput = csvOutput.trim();
    tsvOutput = tsvOutput.trim();
    txtOutput = txtOutput.trim();

    // post-process
    this.csvOutput = csvOutput;
    this.tsvOutput = tsvOutput;
    this.txtOutput = txtOutput;

    // counts
    this.inputCount = inputCount;
    this.removedCount = removedCount;
    this.hanziCount = hanziCount;
    this.uniqueHanziCount = uniqueHanziCount;
    this.processedCount = processedCount;

    this.unmappedCharacters = unmappedCharacters;

    // summary
    String summaryString = "";

    summaryString += padSummary(this.MSG_TOTAL_COUNT, this.PADSIZE_SUMMARY) + inputCount;
    summaryString += this.CRLF + padSummary(this.MSG_REMOVED_COUNT, this.PADSIZE_SUMMARY) + removedCount;
    summaryString += this.CRLF + padSummary(this.MSG_HANZI_COUNT, this.PADSIZE_SUMMARY) + hanziCount;
    summaryString += this.CRLF + padSummary(this.MSG_UNIQUE_COUNT, this.PADSIZE_SUMMARY) + uniqueHanziCount;
    summaryString += this.CRLF + padSummary(this.MSG_PROCESSED_COUNT, this.PADSIZE_SUMMARY) + processedCount;

    this.summary = summaryString;
}

From source file:com.inmobi.conduit.AbstractService.java

protected Table<String, Long, Long> parseCountersFile(FileSystem fs) {
    List<Path> partFiles = listPartFiles(tmpCounterOutputPath, fs);
    if (partFiles == null || partFiles.size() == 0) {
        LOG.warn("No counters files generated by mapred job");
        return null;
    }//from  w ww . j  a v  a  2s .c  om
    Table<String, Long, Long> result = HashBasedTable.create();
    for (Path filePath : partFiles) {
        FSDataInputStream fin = null;
        Scanner scanner = null;
        try {
            fin = fs.open(filePath);
            scanner = new Scanner(fin);

            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    if (tmp.length < 4) {
                        LOG.error("Malformed counter name,skipping " + counterNameValue);
                        continue;
                    }
                    String streamFileNameCombo = tmp[0] + ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER
                            + tmp[1];
                    Long publishTimeWindow = Long.parseLong(tmp[2]);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    result.put(streamFileNameCombo, publishTimeWindow, numOfMsgs);
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name = " + counterNameValue
                            + " ..skipping the line", e);
                }
            }
        } catch (IOException e1) {
            LOG.error("Error while opening file " + filePath + " Skipping");
            continue;
        } finally {
            try {
                if (fin != null) {
                    fin.close();
                }
                if (scanner != null) {
                    scanner.close();
                }
            } catch (Exception e) {
                LOG.warn("Error while closing file " + filePath + " or scanner");
            }
        }
    }
    return result;

}

From source file:ml.shifu.shifu.core.ConfusionMatrix.java

public PerformanceResult bufferedComputeConfusionMatrixAndPerformance(long pigPosTags, long pigNegTags,
        double pigPosWeightTags, double pigNegWeightTags, long records, double maxPScore, double minPScore,
        String scoreDataPath, String evalPerformancePath, boolean isPrint, boolean isGenerateChart,
        int targetColumnIndex, int scoreColumnIndex, int weightColumnIndex, boolean isUseMaxMinScore)
        throws IOException {
    // 1. compute maxScore and minScore in case some cases score are not in [0, 1]
    double maxScore = 1d * scoreScale, minScore = 0d;

    if (isGBTNeedConvertScore()) {
        // if need convert to [0, 1], just keep max score to 1 and min score to 0 without doing anything
    } else {// w w w. j  av a  2 s.c o m
        if (isUseMaxMinScore) {
            // TODO some cases maxPScore is already scaled, how to fix that issue
            maxScore = maxPScore;
            minScore = minPScore;
        } else {
            // otherwise, keep [0, 1]
        }
    }

    LOG.info("{} Transformed (scale included) max score is {}, transformed min score is {}",
            evalConfig.getGbtScoreConvertStrategy(), maxScore, minScore);

    SourceType sourceType = evalConfig.getDataSet().getSource();
    List<Scanner> scanners = ShifuFileUtils.getDataScanners(scoreDataPath, sourceType);
    LOG.info("Number of score files is {} in eval {}.", scanners.size(), evalConfig.getName());

    int numBucket = evalConfig.getPerformanceBucketNum();
    boolean hasWeight = StringUtils.isNotBlank(evalConfig.getDataSet().getWeightColumnName());
    boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);

    List<PerformanceObject> FPRList = new ArrayList<PerformanceObject>(numBucket + 1);
    List<PerformanceObject> catchRateList = new ArrayList<PerformanceObject>(numBucket + 1);
    List<PerformanceObject> gainList = new ArrayList<PerformanceObject>(numBucket + 1);
    List<PerformanceObject> modelScoreList = new ArrayList<PerformanceObject>(numBucket + 1);
    List<PerformanceObject> FPRWeightList = new ArrayList<PerformanceObject>(numBucket + 1);
    List<PerformanceObject> catchRateWeightList = new ArrayList<PerformanceObject>(numBucket + 1);
    List<PerformanceObject> gainWeightList = new ArrayList<PerformanceObject>(numBucket + 1);

    double binScore = (maxScore - minScore) * 1d / numBucket, binCapacity = 1.0 / numBucket, scoreBinCount = 0,
            scoreBinWeigthedCount = 0;
    int fpBin = 1, tpBin = 1, gainBin = 1, fpWeightBin = 1, tpWeightBin = 1, gainWeightBin = 1,
            modelScoreBin = 1;
    long index = 0, cnt = 0, invalidTargetCnt = 0, invalidWgtCnt = 0;

    ConfusionMatrixObject prevCmo = buildInitalCmo(pigPosTags, pigNegTags, pigPosWeightTags, pigNegWeightTags,
            maxScore);
    PerformanceObject po = buildFirstPO(prevCmo);

    FPRList.add(po);
    catchRateList.add(po);
    gainList.add(po);
    FPRWeightList.add(po);
    catchRateWeightList.add(po);
    gainWeightList.add(po);
    modelScoreList.add(po);

    boolean isGBTScoreHalfCutoffStreategy = isGBTScoreHalfCutoffStreategy();
    boolean isGBTScoreMaxMinScaleStreategy = isGBTScoreMaxMinScaleStreategy();

    Splitter splitter = Splitter.on(delimiter).trimResults();

    for (Scanner scanner : scanners) {
        while (scanner.hasNext()) {
            if ((++cnt) % 100000L == 0L) {
                LOG.info("Loaded {} records.", cnt);
            }
            if ((!isDir) && cnt == 1) {
                // if the evaluation score file is the local file, skip the first line since we add
                continue;
            }

            // score is separated by default delimiter in our pig output format
            String[] raw = Lists.newArrayList(splitter.split(scanner.nextLine())).toArray(new String[0]);

            // tag check
            String tag = raw[targetColumnIndex];
            if (StringUtils.isBlank(tag) || (!posTags.contains(tag) && !negTags.contains(tag))) {
                invalidTargetCnt += 1;
                continue;
            }

            double weight = 1d;
            // if has weight
            if (weightColumnIndex > 0) {
                try {
                    weight = Double.parseDouble(raw[weightColumnIndex]);
                } catch (NumberFormatException e) {
                    invalidWgtCnt += 1;
                }
                if (weight < 0d) {
                    invalidWgtCnt += 1;
                    weight = 1d;
                }
            }

            double score = 0.0;
            try {
                score = Double.parseDouble(raw[scoreColumnIndex]);
            } catch (NumberFormatException e) {
                // user set the score column wrong ?
                if (Math.random() < 0.05) {
                    LOG.warn("The score column - {} is not number. Is score column set correctly?",
                            raw[scoreColumnIndex]);
                }
                continue;
            }

            scoreBinCount += 1;
            scoreBinWeigthedCount += weight;

            ConfusionMatrixObject cmo = new ConfusionMatrixObject(prevCmo);
            if (posTags.contains(tag)) {
                // Positive Instance
                cmo.setTp(cmo.getTp() + 1);
                cmo.setFn(cmo.getFn() - 1);
                cmo.setWeightedTp(cmo.getWeightedTp() + weight * 1.0);
                cmo.setWeightedFn(cmo.getWeightedFn() - weight * 1.0);
            } else {
                // Negative Instance
                cmo.setFp(cmo.getFp() + 1);
                cmo.setTn(cmo.getTn() - 1);
                cmo.setWeightedFp(cmo.getWeightedFp() + weight * 1.0);
                cmo.setWeightedTn(cmo.getWeightedTn() - weight * 1.0);
            }

            if (isGBTScoreHalfCutoffStreategy) {
                // half cut off means score <0 then set to 0 and then min score is 0, max score is raw max score,
                // use max min scale to rescale to [0, 1]
                if (score < 0d) {
                    score = 0d;
                }
                score = ((score - 0) * scoreScale) / (maxPScore - 0);
            } else if (isGBTScoreMaxMinScaleStreategy) {
                // use max min scaler to make score in [0, 1], don't foget to time scoreScale
                score = ((score - minPScore) * scoreScale) / (maxPScore - minPScore);
            } else {
                // do nothing, use current score
            }

            cmo.setScore(Double.parseDouble(SCORE_FORMAT.format(score)));

            ConfusionMatrixObject object = cmo;
            po = PerformanceEvaluator.setPerformanceObject(object);
            if (po.fpr >= fpBin * binCapacity) {
                po.binNum = fpBin++;
                FPRList.add(po);
            }

            if (po.recall >= tpBin * binCapacity) {
                po.binNum = tpBin++;
                catchRateList.add(po);
            }

            // prevent 99%
            double validRecordCnt = (double) (index + 1);
            if (validRecordCnt / (pigPosTags + pigNegTags) >= gainBin * binCapacity) {
                po.binNum = gainBin++;
                gainList.add(po);
            }

            if (po.weightedFpr >= fpWeightBin * binCapacity) {
                po.binNum = fpWeightBin++;
                FPRWeightList.add(po);
            }

            if (po.weightedRecall >= tpWeightBin * binCapacity) {
                po.binNum = tpWeightBin++;
                catchRateWeightList.add(po);
            }

            if ((object.getWeightedTp() + object.getWeightedFp()) / object.getWeightedTotal() >= gainWeightBin
                    * binCapacity) {
                po.binNum = gainWeightBin++;
                gainWeightList.add(po);
            }

            if ((maxScore - (modelScoreBin * binScore)) >= score) {
                po.binNum = modelScoreBin++;
                po.scoreCount = scoreBinCount;
                po.scoreWgtCount = scoreBinWeigthedCount;
                // System.out.println("score count is " + scoreBinCount);
                // reset to 0 for next bin score cnt stats
                scoreBinCount = scoreBinWeigthedCount = 0;
                modelScoreList.add(po);
            }
            index += 1;
            prevCmo = cmo;
        }
        scanner.close();
    }
    LOG.info(
            "Totally loading {} records with invalid target records {} and invalid weight records {} in eval {}.",
            cnt, invalidTargetCnt, invalidWgtCnt, evalConfig.getName());

    PerformanceResult result = buildPerfResult(FPRList, catchRateList, gainList, modelScoreList, FPRWeightList,
            catchRateWeightList, gainWeightList);

    synchronized (this.lock) {
        if (isPrint) {
            PerformanceEvaluator.logResult(FPRList, "Bucketing False Positive Rate");

            if (hasWeight) {
                PerformanceEvaluator.logResult(FPRWeightList, "Bucketing Weighted False Positive Rate");
            }

            PerformanceEvaluator.logResult(catchRateList, "Bucketing Catch Rate");

            if (hasWeight) {
                PerformanceEvaluator.logResult(catchRateWeightList, "Bucketing Weighted Catch Rate");
            }

            PerformanceEvaluator.logResult(gainList, "Bucketing Action Rate");

            if (hasWeight) {
                PerformanceEvaluator.logResult(gainWeightList, "Bucketing Weighted Action Rate");
            }

            PerformanceEvaluator.logAucResult(result, hasWeight);
        }

        writePerResult2File(evalPerformancePath, result);

        if (isGenerateChart) {
            generateChartAndJsonPerfFiles(hasWeight, result);
        }
    }

    if (cnt == 0) {
        LOG.error("No score read, the EvalScore did not genernate or is null file");
        throw new ShifuException(ShifuErrorCode.ERROR_EVALSCORE);
    }
    return result;
}

From source file:org.soyatec.windowsazure.table.internal.CloudTableRest.java

/**
 * Make call to table service to send the batch operations.
 */// w w  w . ja  v  a2 s.co  m
private void performBatch() {
    getRetryPolicy().execute(new Callable<Object>() {
        public Object call() throws Exception {
            String path = IBatchExecutor.BATCH_PATH;
            String queryString = "";
            ResourceUriComponents uriComponents = new ResourceUriComponents(getAccountName(), path,
                    queryString);

            String batchBoundary = IBatchExecutor.BATCH_BOUNDARY_PREFIX
                    + Utilities.computeMD5(String.valueOf(new Date().getTime()));
            String changesetBoundary = IBatchExecutor.CHANGESET_BOUNDARY_PREFIX
                    + Utilities.computeMD5(String.valueOf(new Date().getTime()));
            URI uri = HttpUtilities.createRequestUri(getBaseUri(), isUsePathStyleUris(), getAccountName(), path,
                    null, getTimeout(), null, uriComponents);

            HttpRequest request = HttpUtilities.createHttpRequest(uri, HttpMethod.Post);

            request.setHeader(HeaderNames.ApiVersion, XmsVersion.VERSION_2009_07_17);
            request.addHeader(HeaderNames.ContentType, "multipart/mixed; boundary=" + batchBoundary);

            String body = assambleBatchOperationBody(batch, batchBoundary, changesetBoundary);
            ((HttpEntityEnclosingRequest) request).setEntity(new ByteArrayEntity(body.getBytes()));

            credentials.signRequestForSharedKeyLite(request, uriComponents);
            try {
                HttpWebResponse response = HttpUtilities.getResponse(request);
                if (response.getStatusCode() == HttpStatus.SC_ACCEPTED) {
                    StringBuilder buf = new StringBuilder(1024);
                    Scanner in = new Scanner(response.getStream());
                    while (in.hasNext()) {
                        buf.append(in.nextLine()).append("\n");
                    }
                    String errorMessage = Utilities.retrieveErrorMessages(buf.toString());
                    if (errorMessage != null) {
                        throw new StorageServerException(StorageErrorCode.BatchOperationError, errorMessage,
                                response.getStatusCode(), null);
                    } else {
                        Logger.log(buf.toString());
                    }
                    response.close();
                } else {
                    HttpUtilities.processUnexpectedStatusCode(response);
                }
            } catch (StorageException we) {
                throw HttpUtilities.translateWebException(we);
            }
            return null;
        }
    });
}