List of usage examples for org.apache.commons.lang StringUtils splitPreserveAllTokens
public static String[] splitPreserveAllTokens(String str, String separatorChars)
Splits the provided text into an array, separators specified, preserving all tokens, including empty tokens created by adjacent separators.
From source file:org.talend.dataquality.record.linkage.analyzer.StringsClusterAnalyzerTest.java
@Test public void testCluster10000WithThreshold() throws IOException { analyser.init();/*from w w w .j a v a 2 s .co m*/ analyser.setBlockSizeThreshold(10); analyser.withPostMerges(new PostMerge(AttributeMatcherType.JARO_WINKLER, 0.8f)); String columnDelimiter = "|"; InputStream in = this.getClass().getResourceAsStream("cluster10000.txt"); //$NON-NLS-1$ BufferedReader bfr = new BufferedReader(new InputStreamReader(in)); List<String> listOfLines = IOUtils.readLines(bfr); for (String line : listOfLines) { String[] fields = StringUtils.splitPreserveAllTokens(line, columnDelimiter); analyser.analyze(fields[0]); } analyser.end(); List<StringClusters> results = analyser.getResult(); assertElementResult(results.get(0)); }
From source file:org.talend.dataquality.record.linkage.analyzer.StringsClusterAnalyzerTest.java
@Test public void testTShirtsLogic() throws IOException { analyser.init();/*from www .j a v a 2 s . c o m*/ String columnDelimiter = "|"; InputStream in = this.getClass().getResourceAsStream("tshirts.txt"); //$NON-NLS-1$ BufferedReader bfr = new BufferedReader(new InputStreamReader(in)); List<String> listOfLines = IOUtils.readLines(bfr); for (String line : listOfLines) { String[] fields = StringUtils.splitPreserveAllTokens(line, columnDelimiter); analyser.analyze(fields[0]); } analyser.end(); assertTShirtsResult(analyser.getResult().get(0)); }
From source file:org.talend.dataquality.record.linkage.analyzer.StringsClusterAnalyzerTest.java
@Test public void testTShirtsLogicWithThreshold() throws IOException { analyser.init();//from w w w. j a va 2 s. c om analyser.setBlockSizeThreshold(2); // Holds at most 2 records in memory for each block analyser.withPostMerges(new PostMerge(AttributeMatcherType.SOUNDEX, 0.8f)); String columnDelimiter = "|"; InputStream in = this.getClass().getResourceAsStream("tshirts.txt"); //$NON-NLS-1$ BufferedReader bfr = new BufferedReader(new InputStreamReader(in)); List<String> listOfLines = IOUtils.readLines(bfr); for (String line : listOfLines) { String[] fields = StringUtils.splitPreserveAllTokens(line, columnDelimiter); analyser.analyze(fields[0]); } analyser.end(); assertTShirtsResult(analyser.getResult().get(0)); }
From source file:org.talend.dataquality.record.linkage.grouping.StringClusteringWithSwooshTest.java
@Test public void testDoGroupMergeValues() throws IOException { InputStream in = this.getClass().getResourceAsStream("incoming_customers_swoosh_fingerprintkey.txt"); //$NON-NLS-1$ BufferedReader bfr = new BufferedReader(new InputStreamReader(in)); List<String> listOfLines = IOUtils.readLines(bfr); inputList = new ArrayList<Object[]>(); for (String line : listOfLines) { String[] fields = StringUtils.splitPreserveAllTokens(line, columnDelimiter); inputList.add(new Object[] { fields[1] }); }/* www. jav a 2 s . c om*/ String columnName = "NAME"; // Blocking the data given fingerprint key List<Map<String, String>> blockKeySchema = new ArrayList<Map<String, String>>(); Map<String, String> blockKeyDefMap = new HashMap<String, String>(); blockKeyDefMap.put(MatchAnalysisConstant.PRECOLUMN, columnName); blockKeyDefMap.put(MatchAnalysisConstant.KEY_ALGO, BlockingKeyAlgorithmEnum.FINGERPRINTKEY.getValue()); blockKeySchema.add(blockKeyDefMap); Map<String, String> colName2IndexMap = new HashMap<String, String>(); colName2IndexMap.put(columnName, String.valueOf(0)); BlockingKeyHandler blockKeyHandler = new BlockingKeyHandler(blockKeySchema, colName2IndexMap); blockKeyHandler.setInputData(inputList); blockKeyHandler.run(); Map<String, List<String[]>> resultData = blockKeyHandler.getResultDatas(); // Do grouping given swoosh algorithm with Dummy matcher. JunitResultConsumer resultConsumer = new JunitResultConsumer(); recordGroup = new AnalysisSwooshMatchRecordGrouping(resultConsumer); ((AnalysisSwooshMatchRecordGrouping) recordGroup).setOrginalInputColumnSize(2); recordGroup.setRecordLinkAlgorithm(RecordMatcherType.T_SwooshAlgorithm); SurvivorShipAlgorithmParams survivorShipAlgorithmParams = new SurvivorShipAlgorithmParams(); SurvivorshipFunction func = survivorShipAlgorithmParams.new SurvivorshipFunction(); func.setParameter(""); //$NON-NLS-1$ func.setSurvivorShipAlgoEnum(SurvivorShipAlgorithmEnum.MOST_COMMON); survivorShipAlgorithmParams.setSurviorShipAlgos(new SurvivorshipFunction[] { func }); recordGroup.setSurvivorShipAlgorithmParams(survivorShipAlgorithmParams); // // Set default survivorship functions. Map<Integer, SurvivorshipFunction> defaultSurvRules = new HashMap<Integer, SurvivorshipFunction>(); SurvivorshipFunction survFunc = survivorShipAlgorithmParams.new SurvivorshipFunction(); survFunc.setParameter(StringUtils.EMPTY); survFunc.setSurvivorShipAlgoEnum(SurvivorShipAlgorithmEnum.MOST_COMMON); defaultSurvRules.put(0, survFunc); survivorShipAlgorithmParams.setDefaultSurviorshipRules(defaultSurvRules); // recordGroup.setColumnDelimiter(columnDelimiter); recordGroup.setIsLinkToPrevious(Boolean.FALSE); List<Map<String, String>> matchingRule = new ArrayList<Map<String, String>>(); Map<String, String> lnameRecords = new HashMap<String, String>(); lnameRecords.put(IRecordGrouping.COLUMN_IDX, String.valueOf(0)); lnameRecords.put(IRecordGrouping.ATTRIBUTE_NAME, columnName); lnameRecords.put(IRecordGrouping.MATCHING_TYPE, AttributeMatcherType.DUMMY.name()); lnameRecords.put(IRecordGrouping.TOKENIZATION_TYPE, TokenizedResolutionMethod.NO.toString()); lnameRecords.put(IRecordGrouping.CONFIDENCE_WEIGHT, String.valueOf(1)); lnameRecords.put(IRecordGrouping.ATTRIBUTE_THRESHOLD, String.valueOf(0.9)); matchingRule.add(lnameRecords); recordGroup.setIsOutputDistDetails(false); recordGroup.setAcceptableThreshold(0.95f); try { // loop on all input rows. Iterator<List<String[]>> values = resultData.values().iterator(); while (values.hasNext()) { recordGroup.addMatchRule(matchingRule); recordGroup.initialize(); // for each block for (Object[] inputRow : values.next()) { recordGroup.doGroup(inputRow); } recordGroup.end(); } } catch (Throwable e) { log.error(e.getMessage(), e); Assert.fail(); } // Assertions Object[] rds = resultConsumer.getResult(); // for (Object[] rds : ) { if (rds[rds.length - 5].equals("5")) { //$NON-NLS-1$ // Group quality. Assert.assertEquals(1, Double.valueOf(rds[rds.length - 2].toString()).doubleValue(), 0d); // Assert the merged value is the "most common" value. Assert.assertEquals("lment", rds[0].toString()); } // } }
From source file:org.talend.mdm.webapp.browserecords.server.actions.BrowseRecordsAction.java
@Override public ItemBean queryItemBeanById(String dataClusterPK, ViewBean viewBean, EntityModel entityModel, String ids, String language) throws ServiceException { try {/*from ww w . j a v a 2 s. c om*/ String[] idArr = StringUtils.splitPreserveAllTokens(ids, '.'); // String.split() omits the last '' if ends // with delimiter String criteria = CommonUtil.buildCriteriaByIds(entityModel.getKeys(), idArr); Object[] result = getItemBeans(dataClusterPK, viewBean, entityModel, criteria, -1, 20, ItemHelper.SEARCH_DIRECTION_ASC, null, language); @SuppressWarnings("unchecked") List<ItemBean> itemBeans = (List<ItemBean>) result[0]; if (itemBeans.size() > 0) { return itemBeans.get(0); } else { return null; } } catch (WebBaseException e) { throw new ServiceException(BASEMESSAGE.getMessage(new Locale(language), e.getMessage(), e.getArgs())); } catch (Exception exception) { String errorMessage; if (CoreException.class.isInstance(exception.getCause())) { CoreException webCoreException = (CoreException) exception.getCause(); errorMessage = getErrorMessageFromWebCoreException(webCoreException, "", null, //$NON-NLS-1$ new Locale(language)); } else { errorMessage = exception.getLocalizedMessage(); } LOG.error(exception.getMessage(), exception); throw new ServiceException(errorMessage); } }
From source file:org.talend.mdm.webapp.browserecords.server.actions.BrowseRecordsAction.java
@Override public List<ItemBean> getRecords(String concept, List<String> idsList) throws ServiceException { List<ItemBean> records = new ArrayList<ItemBean>(); ItemBean itemBean;/*w w w . j a v a 2 s.com*/ try { for (int i = 0; i < idsList.size(); i++) { String[] ids = StringUtils.splitPreserveAllTokens(idsList.get(i), '.'); WSItem wsItem = CommonUtil.getPort().getItem(new WSGetItem( new WSItemPK(new WSDataClusterPK(this.getCurrentDataCluster()), concept, ids))); itemBean = new ItemBean(); itemBean.setItemXml(wsItem.getContent()); itemBean.setTaskId(wsItem.getTaskId()); records.add(itemBean); } return records; } catch (Exception exception) { LOG.error(exception.getMessage(), exception); throw new ServiceException(exception.getLocalizedMessage()); } }
From source file:org.xwiki.platform.patchservice.impl.PositionImpl.java
/** * {@inheritDoc}//from w w w. j a v a2 s . c o m */ public boolean checkPosition(String text) { String[] rows = StringUtils.splitPreserveAllTokens(text, SEPARATOR); if (rows != null && ((rows.length > this.row && rows[this.row].length() >= this.column) || (rows.length == this.row && this.column == 0))) { return (StringUtils.isEmpty(this.before) || getTextBeforePosition(text).endsWith(this.before)) && (StringUtils.isEmpty(this.after) || getTextAfterPosition(text).startsWith(this.after)); } return (this.row == 0 || this.row == 1) && this.column == 0 && StringUtils.isEmpty(this.before) && StringUtils.isEmpty(this.after); }
From source file:org.xwiki.platform.patchservice.impl.PositionImpl.java
/** * {@inheritDoc}//w w w . j av a2s .c om */ public String getTextBeforePosition(String text) { String[] rows = StringUtils.splitPreserveAllTokens(text, SEPARATOR); if (ArrayUtils.getLength(rows) <= this.row) { return StringUtils.defaultString(StringUtils.join(rows, SEPARATOR)) + (this.row == 0 ? "" : "\n"); } return StringUtils.join(ArrayUtils.subarray(rows, 0, this.row), SEPARATOR) + ((this.row > 0) ? SEPARATOR : "") + StringUtils.substring(rows[this.row], 0, this.column); }
From source file:org.xwiki.platform.patchservice.impl.PositionImpl.java
/** * {@inheritDoc}// w w w .ja va 2 s. co m */ public String getTextAfterPosition(String text) { String[] rows = StringUtils.splitPreserveAllTokens(text, SEPARATOR); if (ArrayUtils.getLength(rows) <= this.row) { return ""; } String textAfter = StringUtils.substring(rows[this.row], this.column) + ((this.row + 1 < rows.length) ? SEPARATOR : "") + StringUtils.join(ArrayUtils.subarray(rows, this.row + 1, rows.length), SEPARATOR); return (this.span <= 0) ? textAfter : StringUtils.substring(textAfter, this.span); }
From source file:org.yes.cart.web.support.util.HttpUtil.java
/** * Get all request parameters as map./* w ww . ja v a2 s . c o m*/ * * @param requestURL request URL {@link HttpServletRequest#getRequestURL()} * @param pathVariables path markers that should be identified as extra parameters * * @return map of parameters (with preserved other) */ public static Map<String, List<String>> getParameters(final String requestURL, final Set<String> pathVariables) { final Map<String, List<String>> parameters = new LinkedHashMap<String, List<String>>(); try { final String[] request = StringUtils.splitPreserveAllTokens(requestURL, '?'); String key = null; if (request != null && request.length > 0) { final String[] pathPairs = StringUtils.splitPreserveAllTokens(request[0], '/'); for (String pathItem : pathPairs) { if (key != null) { if (!parameters.containsKey(key)) { parameters.put(key, new LinkedList<String>()); } final String value = URLDecoder.decode(pathItem, "UTF-8"); parameters.get(key).add(value); key = null; } else if (pathVariables.contains(pathItem)) { key = pathItem; // next path is value } } } if (request != null && request.length > 1) { final String[] parameterPairs = StringUtils.splitPreserveAllTokens(request[1], '&'); for (String parameterPair : parameterPairs) { final int idx = parameterPair.indexOf("="); key = idx > 0 ? URLDecoder.decode(parameterPair.substring(0, idx), "UTF-8") : parameterPair; if (!parameters.containsKey(key)) { parameters.put(key, new LinkedList<String>()); } final String value = idx > 0 && parameterPair.length() > idx + 1 ? URLDecoder.decode(parameterPair.substring(idx + 1), "UTF-8") : null; parameters.get(key).add(value); } } } catch (UnsupportedEncodingException uee) { throw new RuntimeException(uee); } return parameters; }