List of usage examples for java.util.regex Matcher appendReplacement
public Matcher appendReplacement(StringBuilder sb, String replacement)
From source file:com.ikanow.aleph2.harvest.logstash.utils.LogstashConfigUtils.java
public static String validateLogstashInput(LogstashHarvesterConfigBean globals, String sourceKey, String config, StringBuffer errorMessage, boolean isAdmin) { _allowedInputs.addAll(Arrays.asList(globals.non_admin_inputs().toLowerCase().split("\\s*,\\s*"))); _allowedFilters.addAll(Arrays.asList(globals.non_admin_filters().toLowerCase().split("\\s*,\\s*"))); _allowedOutputs.addAll(Arrays.asList(globals.non_admin_outputs().toLowerCase().split("\\s*,\\s*"))); // Configuration validation, phase 1 errorMessage.append("Validation error:"); ObjectNode jsonifiedConfig = parseLogstashConfig(config, errorMessage); if (null == jsonifiedConfig) { return null; }// w ww . j a v a 2s. c o m errorMessage.setLength(0); // Configuration validation, phase 2 - very basic checks on the structure of the object Object input = jsonifiedConfig.get("input"); if ((null == input) || !(input instanceof ObjectNode)) { // Does input exist? errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (0)"); return null; } //TESTED (3_1d) else { // Check there's only one input type and (unless admin) it's one of the allowed types ObjectNode inputDbo = (ObjectNode) input; if (1 != inputDbo.size()) { errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (1)"); return null; } //TESTED if (!isAdmin) { for (String key : (Iterable<String>) () -> inputDbo.fieldNames()) { if (!_allowedInputs.contains(key.toLowerCase())) { errorMessage.append("Security error, non-admin not allowed input type " + key + ", allowed options: " + _allowedInputs.toString()); return null; } //TESTED } } //TESTED (3_1abc) } Object filter = jsonifiedConfig.get("filter"); if ((null == filter) || !(filter instanceof ObjectNode)) { // Does filter exist? errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (2)"); return null; } //TESTED (3_2d) else { // Check there's only one input type and (unless admin) it's one of the allowed types if (!isAdmin) { ObjectNode filterDbo = (ObjectNode) filter; for (String key : (Iterable<String>) () -> filterDbo.fieldNames()) { if (!_allowedFilters.contains(key.toLowerCase())) { errorMessage.append("Security error, non-admin not allowed filter type " + key + ", allowed options: " + _allowedFilters.toString()); return null; } //TESTED } } //TESTED (3_2abc) } //TODO: same for output // Configuration validation, phase 3 Matcher m = null; m = _validationRegexInputReplace.matcher(config); if (!m.find()) { errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (3)"); return null; } //TESTED (see above) else { // If admin check on allowed types String inputType = m.group(2).toLowerCase(); // If it's a file-based plugin then replace sincedb_path (check that it's not used during the JSON-ification): if (inputType.equalsIgnoreCase("file")) { config = _validationRegexInputReplace.matcher(config) .replaceFirst("$1\n sincedb_path => \"_XXX_DOTSINCEDB_XXX_\"\n"); } else if (inputType.equalsIgnoreCase("s3")) { config = _validationRegexInputReplace.matcher(config).replaceFirst( "$1\n sincedb_path => \"_XXX_DOTSINCEDB_XXX_\"\n temporary_directory => \"_XXX_LSTEMPDIR_XXX_\""); } } //TESTED m = _validationRegexNoSourceKey.matcher(config); // (this won't help malicious changes to source key, but will let people know they're not supposed to) if (m.find()) { errorMessage.append( "Not allowed to reference sourceKey - this is automatically appended by the logstash harvester"); return null; } //TESTED // OK now need to append the sourceKey at each stage of the pipeline to really really ensure that nobody sets sourceKey to be different m = _validationRegexAppendFields.matcher(config); StringBuffer newConfig = new StringBuffer(); if (m.find()) { m.appendReplacement(newConfig, "add_field => [ \"[@metadata][sourceKey]\", \"" + sourceKey + "\"] \n\n" + m.group() + " \n if [@metadata][sourceKey] == \"" + sourceKey + "\" { \n\n "); } else { errorMessage.append( "Invalid input format, should be 'input { INPUT_TYPE { ... } }' (only one INPUT_TYPE) and also contain a filter, no \"s around them. (4)"); return null; } m.appendTail(newConfig); config = newConfig.toString(); config = config.replaceAll("}[^}]*$", ""); // (remove the last }) config += "\n\n mutate { update => [ \"[@metadata][sourceKey]\", \"" + sourceKey + "\"] } \n}\n}\n"; // double check the sourceKey hasn't been overwritten and close the if from above //TESTED (syntactically correct and does overwrite sourceKey everywhere - success_2_2) return config; }
From source file:org.codelibs.fess.helper.ViewHelper.java
protected String replaceHighlightQueries(final String cache, final String[] queries) { final StringBuffer buf = new StringBuffer(cache.length() + 100); final StringBuffer segBuf = new StringBuffer(1000); final Pattern p = Pattern.compile("<[^>]+>"); final Matcher m = p.matcher(cache); final String[] regexQueries = new String[queries.length]; final String[] hlQueries = new String[queries.length]; for (int i = 0; i < queries.length; i++) { regexQueries[i] = Pattern.quote(queries[i]); hlQueries[i] = highlightTagPre + queries[i] + highlightTagPost; }//from ww w. ja v a 2 s. co m while (m.find()) { segBuf.setLength(0); m.appendReplacement(segBuf, StringUtil.EMPTY); String segment = segBuf.toString(); for (int i = 0; i < queries.length; i++) { segment = Pattern.compile(regexQueries[i], Pattern.CASE_INSENSITIVE).matcher(segment) .replaceAll(hlQueries[i]); } buf.append(segment); buf.append(m.group(0)); } segBuf.setLength(0); m.appendTail(segBuf); String segment = segBuf.toString(); for (int i = 0; i < queries.length; i++) { segment = Pattern.compile(regexQueries[i], Pattern.CASE_INSENSITIVE).matcher(segment) .replaceAll(hlQueries[i]); } buf.append(segment); return buf.toString(); }
From source file:net.duckling.ddl.web.controller.LynxDDocController.java
/** * uri???// w ww . jav a 2 s . c om * @param html * @param cachePath * @param teamName * @param imagePathList * @return */ private String processImagePath(String html, String teamName, List<String> imagePathList, HttpServletRequest request) { String cachePath = getImageCachePath(request); String baseAddress = config.getProperty("duckling.baseAddress"); String patternString = "src=\"((" + baseAddress + request.getContextPath() + "/|/)" + teamName + "/downloadResource/(\\d+))\""; Pattern pattern = Pattern.compile(patternString); Matcher matcher = pattern.matcher(html); StringBuffer sb = new StringBuffer(); while (matcher.find()) { String imageUri = matcher.group(1); int rid; try { rid = Integer.valueOf(matcher.group(3)); } catch (NumberFormatException e) { LOG.warn("parse rid error.{uri:" + imageUri + "} :" + e.getMessage()); continue; } ; String path = createCacheImage(rid, cachePath); matcher.appendReplacement(sb, "src=\"" + path.replace("\\", "/") + "\""); imagePathList.add(path); } matcher.appendTail(sb); return sb.toString(); }
From source file:gtu._work.etc.SqlReplacerUI.java
private void initGUI() { try {//w w w. j a va2 s . c o m BorderLayout thisLayout = new BorderLayout(); setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); getContentPane().setLayout(thisLayout); { jTabbedPane1 = new JTabbedPane(); getContentPane().add(jTabbedPane1, BorderLayout.CENTER); { jPanel1 = new JPanel(); BorderLayout jPanel1Layout = new BorderLayout(); jPanel1.setLayout(jPanel1Layout); jTabbedPane1.addTab("jPanel1", null, jPanel1, null); { jScrollPane1 = new JScrollPane(); jPanel1.add(jScrollPane1, BorderLayout.CENTER); jScrollPane1.setPreferredSize(new java.awt.Dimension(387, 246)); { jTextArea1 = new JTextArea(); jScrollPane1.setViewportView(jTextArea1); jTextArea1.setText(""); } } } { jPanel2 = new JPanel(); FlowLayout jPanel2Layout = new FlowLayout(); jPanel2.setLayout(jPanel2Layout); jTabbedPane1.addTab("jPanel2", null, jPanel2, null); { replaceFromText = new JTextField(); jPanel2.add(replaceFromText); replaceFromText.setPreferredSize(new java.awt.Dimension(266, 22)); } { replaceToText = new JTextField(); jPanel2.add(replaceToText); replaceToText.setPreferredSize(new java.awt.Dimension(266, 22)); } { execute = new JButton(); jPanel2.add(execute); execute.setText("execute"); execute.setPreferredSize(new java.awt.Dimension(149, 42)); execute.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent evt) { try { String text = jTextArea1.getText(); if (StringUtils.isBlank(text)) { JCommonUtil._jOptionPane_showMessageDialog_error("area empty!"); return; } String fromTxt = replaceFromText.getText(); String toTxt = StringUtils.defaultString(replaceToText.getText()); if (StringUtils.isBlank(fromTxt)) { JCommonUtil._jOptionPane_showMessageDialog_error("fromTxt empty!"); return; } StringBuffer sb = new StringBuffer(); Pattern ptn = Pattern.compile("(.){0,1}" + fromTxt + "(.){0,1}", Pattern.CASE_INSENSITIVE); Matcher mth = null; String[] scopeStrs = { ",", " ", "(", ")", "[", "]" }; BufferedReader reader = new BufferedReader(new StringReader(text)); for (String line = null; (line = reader.readLine()) != null;) { mth = ptn.matcher(line); while (mth.find()) { String scope1 = mth.group(1); String scope2 = mth.group(2); boolean ok1 = scope1.length() == 0 || StringUtils.indexOfAny(scope1, scopeStrs) != -1; boolean ok2 = scope2.length() == 0 || StringUtils.indexOfAny(scope2, scopeStrs) != -1; if (ok1 && ok2) { mth.appendReplacement(sb, scope1 + toTxt + scope2); } } mth.appendTail(sb); sb.append("\n"); } reader.close(); jTextArea1.setText(sb.toString()); } catch (Exception e) { JCommonUtil.handleException(e); } } }); } } } pack(); setSize(400, 300); } catch (Exception e) { //add your error handling code here e.printStackTrace(); } }
From source file:gtu._work.ui.EstoreCodeGenerateUI.java
private void initGUI() { try {//from w ww .ja va 2 s .c o m BorderLayout thisLayout = new BorderLayout(); setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); getContentPane().setLayout(thisLayout); { jTabbedPane1 = new JTabbedPane(); getContentPane().add(jTabbedPane1, BorderLayout.CENTER); jTabbedPane1.setPreferredSize(new java.awt.Dimension(717, 582)); { jPanel1 = new JPanel(); GridLayout jPanel1Layout = new GridLayout(15, 1); jPanel1.setLayout(jPanel1Layout); jTabbedPane1.addTab("", null, jPanel1, null); { jLabel1 = new JLabel(); jPanel1.add(jLabel1); jLabel1.setText("\u985e\u5225\u540d\u7a31"); } { classNameText = new JTextField(); jPanel1.add(classNameText); } { jLabel6 = new JLabel(); jPanel1.add(jLabel6); jLabel6.setText("package\u4e2d\u9593\u540d"); } { packageMiddleNameText = new JTextField(); jPanel1.add(packageMiddleNameText); } { jLabel2 = new JLabel(); jPanel1.add(jLabel2); jLabel2.setText("jsp\u8def\u5f91"); } { jspPathText = new JTextField(); jPanel1.add(jspPathText); jspPathText.addFocusListener(new FocusAdapter() { public void focusLost(FocusEvent evt) { try { String xmlConfigMessage = xmlConfigArea.getText(); if (StringUtils.isBlank(xmlConfigMessage)) { return; } StringBuilder sb = new StringBuilder(); String actionClassPath = jspPathText.getText(); actionClassPath = actionClassPath.replaceFirst("/src/main/webapp", ""); Pattern pattern = Pattern.compile("value=\"[\\w\\/]+\\.jsp\""); Matcher matcher = null; BufferedReader reader = new BufferedReader(new StringReader(xmlConfigMessage)); for (String line = null; (line = reader.readLine()) != null;) { matcher = pattern.matcher(line); if (matcher.find()) { StringBuffer sb2 = new StringBuffer(); matcher.appendReplacement(sb2, "value=\"" + actionClassPath + "\""); matcher.appendTail(sb2); sb.append(sb2 + "\n"); } else { sb.append(line + "\n"); } } xmlConfigArea.setText(sb.toString()); } catch (Exception e) { JCommonUtil.handleException(e); } } }); } { jLabel3 = new JLabel(); jPanel1.add(jLabel3); jLabel3.setText("action\u8def\u5f91"); } { actionPathText = new JTextField(); jPanel1.add(actionPathText); actionPathText.addFocusListener(new FocusAdapter() { public void focusLost(FocusEvent evt) { try { String xmlConfigMessage = xmlConfigArea.getText(); if (StringUtils.isBlank(xmlConfigMessage)) { return; } StringBuilder sb = new StringBuilder(); String actionClassPath = actionPathText.getText(); System.out.println(actionClassPath); actionClassPath = actionClassPath.replaceAll("/src/main/java/", "") .replace('/', '.').replaceAll(".java", ""); Pattern pattern = Pattern.compile("class=\"com\\.sti\\.[\\w\\.]+Action\""); Matcher matcher = null; BufferedReader reader = new BufferedReader(new StringReader(xmlConfigMessage)); for (String line = null; (line = reader.readLine()) != null;) { matcher = pattern.matcher(line); if (matcher.find()) { StringBuffer sb2 = new StringBuffer(); matcher.appendReplacement(sb2, "class=\"" + actionClassPath + "\""); matcher.appendTail(sb2); sb.append(sb2 + "\n"); } else { sb.append(line + "\n"); } } xmlConfigArea.setText(sb.toString()); } catch (Exception e) { JCommonUtil.handleException(e); } } }); } { jLabel4 = new JLabel(); jPanel1.add(jLabel4); jLabel4.setText("service interface\u8def\u5f91"); } { serviceInterfaceText = new JTextField(); jPanel1.add(serviceInterfaceText); } { jLabel5 = new JLabel(); jPanel1.add(jLabel5); jLabel5.setText("service Impl\u8def\u5f91"); } { serviceImplText = new JTextField(); jPanel1.add(serviceImplText); serviceImplText.addFocusListener(new FocusAdapter() { public void focusLost(FocusEvent evt) { try { String xmlConfigMessage = xmlConfigArea.getText(); if (StringUtils.isBlank(xmlConfigMessage)) { return; } StringBuilder sb = new StringBuilder(); String actionClassPath = serviceImplText.getText(); actionClassPath = actionClassPath.replaceFirst("/src/main/java/", "") .replaceAll(".java", "").replace('/', '.'); Pattern pattern = Pattern.compile("class=\"com.sti[\\w\\.]+ServiceImpl\""); Matcher matcher = null; BufferedReader reader = new BufferedReader(new StringReader(xmlConfigMessage)); for (String line = null; (line = reader.readLine()) != null;) { matcher = pattern.matcher(line); if (matcher.find()) { StringBuffer sb2 = new StringBuffer(); matcher.appendReplacement(sb2, "class=\"" + actionClassPath + "\""); matcher.appendTail(sb2); sb.append(sb2 + "\n"); } else { sb.append(line + "\n"); } } xmlConfigArea.setText(sb.toString()); } catch (Exception e) { JCommonUtil.handleException(e); } } }); } { updateBtn = new JButton(); jPanel1.add(updateBtn); updateBtn.setText("\u66f4\u65b0"); updateBtn.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent evt) { updateBtnActionPerformed(evt); } }); } { makeFileBtn = new JButton(); jPanel1.add(makeFileBtn); makeFileBtn.setText("\u7522\u751f\u6a94\u6848"); makeFileBtn.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent evt) { try { makeFileBtnActionPerformed(evt); } catch (IOException e) { JCommonUtil.handleException(e); } } }); } } { jPanel2 = new JPanel(); BorderLayout jPanel2Layout = new BorderLayout(); jPanel2.setLayout(jPanel2Layout); jTabbedPane1.addTab("xml", null, jPanel2, null); { jScrollPane1 = new JScrollPane(); jPanel2.add(jScrollPane1, BorderLayout.CENTER); { xmlConfigArea = new JTextArea(); jScrollPane1.setViewportView(xmlConfigArea); } } } } pack(); this.setSize(733, 525); } catch (Exception e) { e.printStackTrace(); } }
From source file:org.etudes.mneme.impl.ImporteCollegeTextServiceImpl.java
public void importQuestions(String context, Pool pool, String text) throws AssessmentPermissionException { if ((text == null) || (text.length() == 0)) return;//from w ww.j a v a2 s .co m // replace any \r\n with just a \n text = text.replaceAll("\r\n", "\n"); String title = "eCollege paste"; Float points = new Float("1"); if (pool == null) { pool = this.poolService.newPool(context); //read title from the first line ex: Unit 2: Week 2 - Quiz String findTitle = text.substring(0, text.indexOf("\n")); if (findTitle != null) { String[] titleParts = findTitle.split("[:-]"); if (titleParts.length == 2 && titleParts[1] != null && titleParts[1].length() != 0) title = titleParts[1].trim(); else if (titleParts.length > 2) title = findTitle.substring(findTitle.indexOf(titleParts[1])); } pool.setTitle(title); pool.setPointsEdit(points); // create assessment Assessment assmt = assessmentService.newAssessment(context); assmt.setType(AssessmentType.test); assmt.setTitle(title); Part part = assmt.getParts().addPart(); Pattern p_groups = Pattern.compile("Collapse[\\s]*Question(.*?)[\\n]*[\\t]*row[\\t]*Move[\\s]*Question", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL); Matcher m = p_groups.matcher(text); StringBuffer sb = new StringBuffer(); while (m.find()) { String workOn = m.group(0); String[] lines = workOn.split("[\\n]"); processECollegeTextGroup(pool, part, lines); m.appendReplacement(sb, ""); } m.appendTail(sb); // remaining last text if (sb != null && sb.length() != 0) { if (sb.indexOf("Collapse Question") != -1) { String workOn = sb.substring(sb.indexOf("Collapse Question")); String[] lines = workOn.split("[\\n]"); processECollegeTextGroup(pool, part, lines); } } try { assmt.getGrading().setGradebookIntegration(Boolean.TRUE); if (assmt.getParts().getTotalPoints().floatValue() <= 0) { assmt.setNeedsPoints(Boolean.FALSE); } assessmentService.saveAssessment(assmt); } catch (AssessmentPolicyException ep) { } this.poolService.savePool(pool); } }
From source file:com.ibm.jaggr.core.impl.modulebuilder.css.CSSModuleBuilder.java
/** * Minifies a CSS string by removing comments and excess white-space, as well as * some unneeded tokens./* w w w. ja v a 2 s. co m*/ * * @param css The contents of a CSS file as a String * @param res The resource for the CSS file * @return the minified css */ protected String minify(String css, IResource res) { // replace all quoted strings and url(...) patterns with unique ids so that // they won't be affected by whitespace removal. LinkedList<String> quotedStringReplacements = new LinkedList<String>(); Matcher m = quotedStringPattern.matcher(css); StringBuffer sb = new StringBuffer(); int i = 0; while (m.find()) { String text = (m.group(1) != null) ? ("url(" + StringUtils.trim(m.group(1)) + ")") : //$NON-NLS-1$ //$NON-NLS-2$ m.group(0); quotedStringReplacements.add(i, text); String replacement = "%%" + QUOTED_STRING_MARKER + (i++) + "__%%"; //$NON-NLS-1$ //$NON-NLS-2$ m.appendReplacement(sb, ""); //$NON-NLS-1$ sb.append(replacement); } m.appendTail(sb); css = sb.toString(); // Get rid of extra whitespace css = whitespacePattern.matcher(css).replaceAll(" "); //$NON-NLS-1$ css = endsPattern.matcher(css).replaceAll(""); //$NON-NLS-1$ css = closeBracePattern.matcher(css).replaceAll("}"); //$NON-NLS-1$ m = delimitersPattern.matcher(css); sb = new StringBuffer(); while (m.find()) { String text = m.group(1); m.appendReplacement(sb, ""); //$NON-NLS-1$ sb.append(text.length() == 1 ? text : text.replace(" ", "")); //$NON-NLS-1$ //$NON-NLS-2$ } m.appendTail(sb); css = sb.toString(); // restore quoted strings and url(...) patterns m = QUOTED_STRING_MARKER_PAT.matcher(css); sb = new StringBuffer(); while (m.find()) { i = Integer.parseInt(m.group(1)); m.appendReplacement(sb, ""); //$NON-NLS-1$ sb.append(quotedStringReplacements.get(i)); } m.appendTail(sb); css = sb.toString(); return css.toString(); }
From source file:org.apache.roller.weblogger.business.plugins.entry.SearchPluginBase.java
/** * Apply plugin to content of specified String. * * @param str String to which plugin should be applied. * @return Results of applying plugin to string. * @see org.apache.roller.weblogger.model.PagePlugin#render(String) *//*w ww. j a va2s . co m*/ public String render(WeblogEntry entry, String str) { Pattern pattern = getPattern(); Matcher m = pattern.matcher(str); StringBuffer result = new StringBuffer(str.length() + 128); // rough guess at a reasonable length Object[] args = new Object[] { "", "", null, null }; while (m.find()) { // parse out the parts of the match String type = m.group(1); boolean feelinLucky = type.equals("!"); // are ya feelin lucky? are ya punk? String linkText = m.group(2); String searchText = m.group(3); if (searchText == null || searchText.length() == 0) { searchText = linkText; } // URL-encode the search text String encodedSearchText = encodeSearchText(searchText); // form the replacement string MessageFormat linkFormat = feelinLucky ? getLuckyLinkFormat() : getLinkFormat(); StringBuffer replacement = new StringBuffer(128); args[2] = linkText; args[3] = encodedSearchText; linkFormat.format(args, replacement, new FieldPosition(0)); // append replacement m.appendReplacement(result, replacement.toString()); } m.appendTail(result); return result.toString(); }
From source file:org.alfresco.web.site.servlet.CSRFFilter.java
private String resolve(String str, Map<String, String> propertyMap) { if (str == null) { return null; }/*from w w w . j a v a 2 s . c o m*/ Pattern pattern = Pattern.compile("\\{(.+?)\\}"); Matcher matcher = pattern.matcher(str); StringBuffer buffer = new StringBuffer(); while (matcher.find()) { if (propertyMap.containsKey(matcher.group(1))) { String replacement = resolve(propertyMap.get(matcher.group(1)), propertyMap); matcher.appendReplacement(buffer, replacement != null ? Matcher.quoteReplacement(replacement) : "null"); } } matcher.appendTail(buffer); return buffer.toString(); }
From source file:org.paxle.crawler.http.impl.HttpCrawler.java
@Modified public synchronized void modified(Map<String, Object> configuration) { /*/*from ww w . ja va2 s. co m*/ * Cleanup old config */ this.cleanup(); /* * Init with changed configuration */ this.connectionManager = new MultiThreadedHttpConnectionManager(); final HttpConnectionManagerParams connectionManagerParams = connectionManager.getParams(); // configure connections per host final Integer maxConnections = (Integer) configuration.get(PROP_MAXCONNECTIONS_PER_HOST); if (maxConnections != null) { connectionManagerParams.setDefaultMaxConnectionsPerHost(maxConnections.intValue()); } // configuring timeouts final Integer connectionTimeout = (Integer) configuration.get(PROP_CONNECTION_TIMEOUT); if (connectionTimeout != null) { connectionManagerParams.setConnectionTimeout(connectionTimeout.intValue()); } final Integer socketTimeout = (Integer) configuration.get(PROP_SOCKET_TIMEOUT); if (socketTimeout != null) { connectionManagerParams.setSoTimeout(socketTimeout.intValue()); } // set new http client this.httpClient = new HttpClient(connectionManager); // the crawler should request and accept content-encoded data final Boolean acceptEncoding = (Boolean) configuration.get(PROP_ACCEPT_ENCODING); if (acceptEncoding != null) { this.acceptEncoding = acceptEncoding.booleanValue(); } // specifies if the crawler should skipp unsupported-mime-types final Boolean skipUnsupportedMimeTypes = (Boolean) configuration.get(PROP_SKIP_UNSUPPORTED_MIMETYPES); if (skipUnsupportedMimeTypes != null) { this.skipUnsupportedMimeTypes = skipUnsupportedMimeTypes.booleanValue(); } // the cookie policy to use for crawling final String propCookiePolicy = (String) configuration.get(PROP_COOKIE_POLICY); this.cookiePolicy = (propCookiePolicy == null || propCookiePolicy.length() == 0) ? CookiePolicy.BROWSER_COMPATIBILITY : propCookiePolicy; // the http-user-agent string that should be used final String userAgent = (String) configuration.get(PROP_USER_AGENT); if (userAgent != null) { StringBuffer buf = new StringBuffer(); Pattern pattern = Pattern.compile("\\$\\{[^\\}]*}"); Matcher matcher = pattern.matcher(userAgent); // replacing property placeholders with system-properties while (matcher.find()) { String placeHolder = matcher.group(); String propName = placeHolder.substring(2, placeHolder.length() - 1); String propValue = System.getProperty(propName); if (propValue != null) matcher.appendReplacement(buf, propValue); } matcher.appendTail(buf); this.userAgent = buf.toString(); } else { // Fallback this.userAgent = "PaxleFramework"; } // download limit in bytes final Integer maxDownloadSize = (Integer) configuration.get(PROP_MAXDOWNLOAD_SIZE); if (maxDownloadSize != null) { this.maxDownloadSize = maxDownloadSize.intValue(); } // limit data transfer rate final Integer transferLimit = (Integer) configuration.get(PROP_TRANSFER_LIMIT); int limitKBps = 0; if (transferLimit != null) limitKBps = transferLimit.intValue(); this.logger.debug("transfer rate limit: " + limitKBps + " kb/s"); // TODO: lrc = (limitKBps > 0) ? new CrawlerTools.LimitedRateCopier(limitKBps) : null; // proxy configuration final Boolean useProxyVal = (Boolean) configuration.get(PROP_PROXY_USE); final String host = (String) configuration.get(PROP_PROXY_HOST); final Integer portVal = (Integer) configuration.get(PROP_PROXY_PORT); if (useProxyVal != null && useProxyVal.booleanValue() && host != null && host.length() > 0 && portVal != null) { this.logger.info(String.format("Proxy is enabled: %s:%d", host, portVal)); final int port = portVal.intValue(); final ProxyHost proxyHost = new ProxyHost(host, port); this.httpClient.getHostConfiguration().setProxyHost(proxyHost); final String user = (String) configuration.get(PROP_PROXY_HOST); final String pwd = (String) configuration.get(PROP_PROXY_PASSWORD); if (user != null && user.length() > 0 && pwd != null && pwd.length() > 0) this.httpClient.getState().setProxyCredentials(new AuthScope(host, port), new UsernamePasswordCredentials(user, pwd)); } else { this.logger.info("Proxy is disabled"); this.httpClient.getHostConfiguration().setProxyHost(null); this.httpClient.getState().clearCredentials(); } }