Example usage for org.apache.commons.lang StringUtils splitPreserveAllTokens

List of usage examples for org.apache.commons.lang StringUtils splitPreserveAllTokens

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils splitPreserveAllTokens.

Prototype

public static String[] splitPreserveAllTokens(String str, String separatorChars) 

Source Link

Document

Splits the provided text into an array, separators specified, preserving all tokens, including empty tokens created by adjacent separators.

Usage

From source file:org.talend.dataquality.record.linkage.analyzer.StringsClusterAnalyzerTest.java

@Test
public void testCluster10000WithThreshold() throws IOException {
    analyser.init();/*from   w  w w  .j  a v  a  2  s  .co  m*/
    analyser.setBlockSizeThreshold(10);
    analyser.withPostMerges(new PostMerge(AttributeMatcherType.JARO_WINKLER, 0.8f));
    String columnDelimiter = "|";
    InputStream in = this.getClass().getResourceAsStream("cluster10000.txt"); //$NON-NLS-1$
    BufferedReader bfr = new BufferedReader(new InputStreamReader(in));
    List<String> listOfLines = IOUtils.readLines(bfr);
    for (String line : listOfLines) {
        String[] fields = StringUtils.splitPreserveAllTokens(line, columnDelimiter);
        analyser.analyze(fields[0]);
    }
    analyser.end();
    List<StringClusters> results = analyser.getResult();
    assertElementResult(results.get(0));
}

From source file:org.talend.dataquality.record.linkage.analyzer.StringsClusterAnalyzerTest.java

@Test
public void testTShirtsLogic() throws IOException {
    analyser.init();/*from  www .j  a  v a 2 s  .  c  o m*/
    String columnDelimiter = "|";
    InputStream in = this.getClass().getResourceAsStream("tshirts.txt"); //$NON-NLS-1$
    BufferedReader bfr = new BufferedReader(new InputStreamReader(in));
    List<String> listOfLines = IOUtils.readLines(bfr);
    for (String line : listOfLines) {
        String[] fields = StringUtils.splitPreserveAllTokens(line, columnDelimiter);
        analyser.analyze(fields[0]);
    }
    analyser.end();

    assertTShirtsResult(analyser.getResult().get(0));
}

From source file:org.talend.dataquality.record.linkage.analyzer.StringsClusterAnalyzerTest.java

@Test
public void testTShirtsLogicWithThreshold() throws IOException {
    analyser.init();//from  w  w  w. j a  va 2  s.  c  om
    analyser.setBlockSizeThreshold(2); // Holds at most 2 records in memory for each block
    analyser.withPostMerges(new PostMerge(AttributeMatcherType.SOUNDEX, 0.8f));
    String columnDelimiter = "|";
    InputStream in = this.getClass().getResourceAsStream("tshirts.txt"); //$NON-NLS-1$
    BufferedReader bfr = new BufferedReader(new InputStreamReader(in));
    List<String> listOfLines = IOUtils.readLines(bfr);
    for (String line : listOfLines) {
        String[] fields = StringUtils.splitPreserveAllTokens(line, columnDelimiter);
        analyser.analyze(fields[0]);
    }

    analyser.end();
    assertTShirtsResult(analyser.getResult().get(0));
}

From source file:org.talend.dataquality.record.linkage.grouping.StringClusteringWithSwooshTest.java

@Test
public void testDoGroupMergeValues() throws IOException {
    InputStream in = this.getClass().getResourceAsStream("incoming_customers_swoosh_fingerprintkey.txt"); //$NON-NLS-1$
    BufferedReader bfr = new BufferedReader(new InputStreamReader(in));
    List<String> listOfLines = IOUtils.readLines(bfr);
    inputList = new ArrayList<Object[]>();
    for (String line : listOfLines) {
        String[] fields = StringUtils.splitPreserveAllTokens(line, columnDelimiter);
        inputList.add(new Object[] { fields[1] });
    }/*  www. jav  a 2  s  .  c om*/
    String columnName = "NAME";
    // Blocking the data given fingerprint key
    List<Map<String, String>> blockKeySchema = new ArrayList<Map<String, String>>();
    Map<String, String> blockKeyDefMap = new HashMap<String, String>();

    blockKeyDefMap.put(MatchAnalysisConstant.PRECOLUMN, columnName);
    blockKeyDefMap.put(MatchAnalysisConstant.KEY_ALGO, BlockingKeyAlgorithmEnum.FINGERPRINTKEY.getValue());
    blockKeySchema.add(blockKeyDefMap);

    Map<String, String> colName2IndexMap = new HashMap<String, String>();
    colName2IndexMap.put(columnName, String.valueOf(0));
    BlockingKeyHandler blockKeyHandler = new BlockingKeyHandler(blockKeySchema, colName2IndexMap);
    blockKeyHandler.setInputData(inputList);
    blockKeyHandler.run();
    Map<String, List<String[]>> resultData = blockKeyHandler.getResultDatas();

    // Do grouping given swoosh algorithm with Dummy matcher.
    JunitResultConsumer resultConsumer = new JunitResultConsumer();
    recordGroup = new AnalysisSwooshMatchRecordGrouping(resultConsumer);
    ((AnalysisSwooshMatchRecordGrouping) recordGroup).setOrginalInputColumnSize(2);
    recordGroup.setRecordLinkAlgorithm(RecordMatcherType.T_SwooshAlgorithm);

    SurvivorShipAlgorithmParams survivorShipAlgorithmParams = new SurvivorShipAlgorithmParams();
    SurvivorshipFunction func = survivorShipAlgorithmParams.new SurvivorshipFunction();
    func.setParameter(""); //$NON-NLS-1$
    func.setSurvivorShipAlgoEnum(SurvivorShipAlgorithmEnum.MOST_COMMON);

    survivorShipAlgorithmParams.setSurviorShipAlgos(new SurvivorshipFunction[] { func });
    recordGroup.setSurvivorShipAlgorithmParams(survivorShipAlgorithmParams);

    // // Set default survivorship functions.
    Map<Integer, SurvivorshipFunction> defaultSurvRules = new HashMap<Integer, SurvivorshipFunction>();
    SurvivorshipFunction survFunc = survivorShipAlgorithmParams.new SurvivorshipFunction();
    survFunc.setParameter(StringUtils.EMPTY);
    survFunc.setSurvivorShipAlgoEnum(SurvivorShipAlgorithmEnum.MOST_COMMON);
    defaultSurvRules.put(0, survFunc);

    survivorShipAlgorithmParams.setDefaultSurviorshipRules(defaultSurvRules);

    // recordGroup.setColumnDelimiter(columnDelimiter);
    recordGroup.setIsLinkToPrevious(Boolean.FALSE);
    List<Map<String, String>> matchingRule = new ArrayList<Map<String, String>>();

    Map<String, String> lnameRecords = new HashMap<String, String>();
    lnameRecords.put(IRecordGrouping.COLUMN_IDX, String.valueOf(0));
    lnameRecords.put(IRecordGrouping.ATTRIBUTE_NAME, columnName);
    lnameRecords.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name());
    lnameRecords.put(IRecordGrouping.TOKENIZATION_TYPE, TokenizedResolutionMethod.NO.toString());
    lnameRecords.put(IRecordGrouping.CONFIDENCE_WEIGHT, String.valueOf(1));
    lnameRecords.put(IRecordGrouping.ATTRIBUTE_THRESHOLD, String.valueOf(0.9));

    matchingRule.add(lnameRecords);

    recordGroup.setIsOutputDistDetails(false);
    recordGroup.setAcceptableThreshold(0.95f);
    try {

        // loop on all input rows.
        Iterator<List<String[]>> values = resultData.values().iterator();
        while (values.hasNext()) {
            recordGroup.addMatchRule(matchingRule);
            recordGroup.initialize();
            // for each block
            for (Object[] inputRow : values.next()) {
                recordGroup.doGroup(inputRow);
            }
            recordGroup.end();
        }
    } catch (Throwable e) {
        log.error(e.getMessage(), e);
        Assert.fail();
    }
    // Assertions

    Object[] rds = resultConsumer.getResult();
    //        for (Object[] rds : ) {
    if (rds[rds.length - 5].equals("5")) { //$NON-NLS-1$
        // Group quality.
        Assert.assertEquals(1, Double.valueOf(rds[rds.length - 2].toString()).doubleValue(), 0d);
        // Assert the merged value is the "most common" value.
        Assert.assertEquals("lment", rds[0].toString());
    }
    //        }

}

From source file:org.talend.mdm.webapp.browserecords.server.actions.BrowseRecordsAction.java

@Override
public ItemBean queryItemBeanById(String dataClusterPK, ViewBean viewBean, EntityModel entityModel, String ids,
        String language) throws ServiceException {
    try {/*from   ww  w  . j a  v a 2 s. c om*/
        String[] idArr = StringUtils.splitPreserveAllTokens(ids, '.'); // String.split() omits the last '' if ends
                                                                       // with delimiter
        String criteria = CommonUtil.buildCriteriaByIds(entityModel.getKeys(), idArr);
        Object[] result = getItemBeans(dataClusterPK, viewBean, entityModel, criteria, -1, 20,
                ItemHelper.SEARCH_DIRECTION_ASC, null, language);
        @SuppressWarnings("unchecked")
        List<ItemBean> itemBeans = (List<ItemBean>) result[0];
        if (itemBeans.size() > 0) {
            return itemBeans.get(0);
        } else {
            return null;
        }
    } catch (WebBaseException e) {
        throw new ServiceException(BASEMESSAGE.getMessage(new Locale(language), e.getMessage(), e.getArgs()));
    } catch (Exception exception) {
        String errorMessage;
        if (CoreException.class.isInstance(exception.getCause())) {
            CoreException webCoreException = (CoreException) exception.getCause();
            errorMessage = getErrorMessageFromWebCoreException(webCoreException, "", null, //$NON-NLS-1$
                    new Locale(language));
        } else {
            errorMessage = exception.getLocalizedMessage();
        }
        LOG.error(exception.getMessage(), exception);
        throw new ServiceException(errorMessage);
    }
}

From source file:org.talend.mdm.webapp.browserecords.server.actions.BrowseRecordsAction.java

@Override
public List<ItemBean> getRecords(String concept, List<String> idsList) throws ServiceException {
    List<ItemBean> records = new ArrayList<ItemBean>();
    ItemBean itemBean;/*w w  w  . j a v a  2  s.com*/
    try {
        for (int i = 0; i < idsList.size(); i++) {
            String[] ids = StringUtils.splitPreserveAllTokens(idsList.get(i), '.');
            WSItem wsItem = CommonUtil.getPort().getItem(new WSGetItem(
                    new WSItemPK(new WSDataClusterPK(this.getCurrentDataCluster()), concept, ids)));
            itemBean = new ItemBean();
            itemBean.setItemXml(wsItem.getContent());
            itemBean.setTaskId(wsItem.getTaskId());
            records.add(itemBean);
        }
        return records;
    } catch (Exception exception) {
        LOG.error(exception.getMessage(), exception);
        throw new ServiceException(exception.getLocalizedMessage());
    }
}

From source file:org.xwiki.platform.patchservice.impl.PositionImpl.java

/**
 * {@inheritDoc}//from   w  w w. j a v a2  s  .  c  o m
 */
public boolean checkPosition(String text) {
    String[] rows = StringUtils.splitPreserveAllTokens(text, SEPARATOR);
    if (rows != null && ((rows.length > this.row && rows[this.row].length() >= this.column)
            || (rows.length == this.row && this.column == 0))) {
        return (StringUtils.isEmpty(this.before) || getTextBeforePosition(text).endsWith(this.before))
                && (StringUtils.isEmpty(this.after) || getTextAfterPosition(text).startsWith(this.after));
    }
    return (this.row == 0 || this.row == 1) && this.column == 0 && StringUtils.isEmpty(this.before)
            && StringUtils.isEmpty(this.after);
}

From source file:org.xwiki.platform.patchservice.impl.PositionImpl.java

/**
 * {@inheritDoc}//w  w w .  j av a2s .c om
 */
public String getTextBeforePosition(String text) {
    String[] rows = StringUtils.splitPreserveAllTokens(text, SEPARATOR);
    if (ArrayUtils.getLength(rows) <= this.row) {
        return StringUtils.defaultString(StringUtils.join(rows, SEPARATOR)) + (this.row == 0 ? "" : "\n");
    }
    return StringUtils.join(ArrayUtils.subarray(rows, 0, this.row), SEPARATOR)
            + ((this.row > 0) ? SEPARATOR : "") + StringUtils.substring(rows[this.row], 0, this.column);
}

From source file:org.xwiki.platform.patchservice.impl.PositionImpl.java

/**
 * {@inheritDoc}//  w w w  .ja  va 2  s.  co m
 */
public String getTextAfterPosition(String text) {
    String[] rows = StringUtils.splitPreserveAllTokens(text, SEPARATOR);
    if (ArrayUtils.getLength(rows) <= this.row) {
        return "";
    }
    String textAfter = StringUtils.substring(rows[this.row], this.column)
            + ((this.row + 1 < rows.length) ? SEPARATOR : "")
            + StringUtils.join(ArrayUtils.subarray(rows, this.row + 1, rows.length), SEPARATOR);
    return (this.span <= 0) ? textAfter : StringUtils.substring(textAfter, this.span);
}

From source file:org.yes.cart.web.support.util.HttpUtil.java

/**
 * Get all request parameters as map./*  w ww .  ja v  a2 s  .  c  o m*/
 *
 * @param requestURL request URL {@link HttpServletRequest#getRequestURL()}
 * @param pathVariables path markers that should be identified as extra parameters
 *
 * @return map of parameters (with preserved other)
 */
public static Map<String, List<String>> getParameters(final String requestURL,
        final Set<String> pathVariables) {

    final Map<String, List<String>> parameters = new LinkedHashMap<String, List<String>>();

    try {
        final String[] request = StringUtils.splitPreserveAllTokens(requestURL, '?');

        String key = null;

        if (request != null && request.length > 0) {
            final String[] pathPairs = StringUtils.splitPreserveAllTokens(request[0], '/');

            for (String pathItem : pathPairs) {
                if (key != null) {
                    if (!parameters.containsKey(key)) {
                        parameters.put(key, new LinkedList<String>());
                    }
                    final String value = URLDecoder.decode(pathItem, "UTF-8");
                    parameters.get(key).add(value);
                    key = null;
                } else if (pathVariables.contains(pathItem)) {
                    key = pathItem; // next path is value
                }
            }
        }

        if (request != null && request.length > 1) {

            final String[] parameterPairs = StringUtils.splitPreserveAllTokens(request[1], '&');
            for (String parameterPair : parameterPairs) {
                final int idx = parameterPair.indexOf("=");
                key = idx > 0 ? URLDecoder.decode(parameterPair.substring(0, idx), "UTF-8") : parameterPair;
                if (!parameters.containsKey(key)) {
                    parameters.put(key, new LinkedList<String>());
                }
                final String value = idx > 0 && parameterPair.length() > idx + 1
                        ? URLDecoder.decode(parameterPair.substring(idx + 1), "UTF-8")
                        : null;
                parameters.get(key).add(value);
            }
        }

    } catch (UnsupportedEncodingException uee) {
        throw new RuntimeException(uee);
    }
    return parameters;

}