List of usage examples for java.net URL getQuery
public String getQuery()
From source file:com.gargoylesoftware.htmlunit.HttpWebConnection.java
/** * Creates an <tt>HttpMethod</tt> instance according to the specified parameters. * @param webRequest the request//from w ww . j a v a 2 s . c o m * @param httpClientBuilder the httpClientBuilder that will be configured * @return the <tt>HttpMethod</tt> instance constructed according to the specified parameters * @throws IOException * @throws URISyntaxException */ @SuppressWarnings("deprecation") private HttpUriRequest makeHttpMethod(final WebRequest webRequest, final HttpClientBuilder httpClientBuilder) throws IOException, URISyntaxException { final String charset = webRequest.getCharset(); // Make sure that the URL is fully encoded. IE actually sends some Unicode chars in request // URLs; because of this we allow some Unicode chars in URLs. However, at this point we're // handing things over the HttpClient, and HttpClient will blow up if we leave these Unicode // chars in the URL. final URL url = UrlUtils.encodeUrl(webRequest.getUrl(), false, charset); // URIUtils.createURI is deprecated but as of httpclient-4.2.1, URIBuilder doesn't work here as it encodes path // what shouldn't happen here URI uri = URIUtils.createURI(url.getProtocol(), url.getHost(), url.getPort(), url.getPath(), escapeQuery(url.getQuery()), null); if (getVirtualHost() != null) { uri = URI.create(getVirtualHost()); } final HttpRequestBase httpMethod = buildHttpMethod(webRequest.getHttpMethod(), uri); setProxy(httpMethod, webRequest); if (!(httpMethod instanceof HttpEntityEnclosingRequest)) { // this is the case for GET as well as TRACE, DELETE, OPTIONS and HEAD if (!webRequest.getRequestParameters().isEmpty()) { final List<NameValuePair> pairs = webRequest.getRequestParameters(); final org.apache.http.NameValuePair[] httpClientPairs = NameValuePair.toHttpClient(pairs); final String query = URLEncodedUtils.format(Arrays.asList(httpClientPairs), charset); uri = URIUtils.createURI(url.getProtocol(), url.getHost(), url.getPort(), url.getPath(), query, null); httpMethod.setURI(uri); } } else { // POST as well as PUT and PATCH final HttpEntityEnclosingRequest method = (HttpEntityEnclosingRequest) httpMethod; if (webRequest.getEncodingType() == FormEncodingType.URL_ENCODED && method instanceof HttpPost) { final HttpPost postMethod = (HttpPost) method; if (webRequest.getRequestBody() == null) { final List<NameValuePair> pairs = webRequest.getRequestParameters(); final org.apache.http.NameValuePair[] httpClientPairs = NameValuePair.toHttpClient(pairs); final String query = URLEncodedUtils.format(Arrays.asList(httpClientPairs), charset); final StringEntity urlEncodedEntity = new StringEntity(query, charset); urlEncodedEntity.setContentType(URLEncodedUtils.CONTENT_TYPE); postMethod.setEntity(urlEncodedEntity); } else { final String body = StringUtils.defaultString(webRequest.getRequestBody()); final StringEntity urlEncodedEntity = new StringEntity(body, charset); urlEncodedEntity.setContentType(URLEncodedUtils.CONTENT_TYPE); postMethod.setEntity(urlEncodedEntity); } } else if (FormEncodingType.MULTIPART == webRequest.getEncodingType()) { final Charset c = getCharset(charset, webRequest.getRequestParameters()); final MultipartEntityBuilder builder = MultipartEntityBuilder.create().setLaxMode(); builder.setCharset(c); for (final NameValuePair pair : webRequest.getRequestParameters()) { if (pair instanceof KeyDataPair) { buildFilePart((KeyDataPair) pair, builder); } else { builder.addTextBody(pair.getName(), pair.getValue(), ContentType.create("text/plain", charset)); } } method.setEntity(builder.build()); } else { // for instance a PUT or PATCH request final String body = webRequest.getRequestBody(); if (body != null) { method.setEntity(new StringEntity(body, charset)); } } } configureHttpProcessorBuilder(httpClientBuilder, webRequest); // Tell the client where to get its credentials from // (it may have changed on the webClient since last call to getHttpClientFor(...)) final CredentialsProvider credentialsProvider = webClient_.getCredentialsProvider(); // if the used url contains credentials, we have to add this final Credentials requestUrlCredentials = webRequest.getUrlCredentials(); if (null != requestUrlCredentials && webClient_.getBrowserVersion().hasFeature(URL_AUTH_CREDENTIALS)) { final URL requestUrl = webRequest.getUrl(); final AuthScope authScope = new AuthScope(requestUrl.getHost(), requestUrl.getPort()); // updating our client to keep the credentials for the next request credentialsProvider.setCredentials(authScope, requestUrlCredentials); httpContext_.removeAttribute(HttpClientContext.TARGET_AUTH_STATE); } // if someone has set credentials to this request, we have to add this final Credentials requestCredentials = webRequest.getCredentials(); if (null != requestCredentials) { final URL requestUrl = webRequest.getUrl(); final AuthScope authScope = new AuthScope(requestUrl.getHost(), requestUrl.getPort()); // updating our client to keep the credentials for the next request credentialsProvider.setCredentials(authScope, requestCredentials); httpContext_.removeAttribute(HttpClientContext.TARGET_AUTH_STATE); } httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); httpContext_.removeAttribute(HttpClientContext.CREDS_PROVIDER); return httpMethod; }
From source file:org.apache.hc.core5.http.benchmark.HttpBenchmark.java
private ClassicHttpRequest createRequest(final HttpHost host) { final URL url = config.getUrl(); HttpEntity entity = null;/* www . ja va2s .c o m*/ // Prepare requests for each thread if (config.getPayloadFile() != null) { final FileEntity fe = new FileEntity(config.getPayloadFile()); fe.setContentType(config.getContentType()); fe.setChunked(config.isUseChunking()); entity = fe; } else if (config.getPayloadText() != null) { final StringEntity se = new StringEntity(config.getPayloadText(), ContentType.parse(config.getContentType())); se.setChunked(config.isUseChunking()); entity = se; } final ClassicHttpRequest request; if ("POST".equals(config.getMethod())) { final ClassicHttpRequest httppost = new BasicClassicHttpRequest("POST", url.getPath()); httppost.setEntity(entity); request = httppost; } else if ("PUT".equals(config.getMethod())) { final ClassicHttpRequest httpput = new BasicClassicHttpRequest("PUT", url.getPath()); httpput.setEntity(entity); request = httpput; } else { String path = url.getPath(); if (url.getQuery() != null && url.getQuery().length() > 0) { path += "?" + url.getQuery(); } else if (path.trim().length() == 0) { path = "/"; } request = new BasicClassicHttpRequest(config.getMethod(), path); } request.setVersion(config.isUseHttp1_0() ? HttpVersion.HTTP_1_0 : HttpVersion.HTTP_1_1); if (!config.isKeepAlive()) { request.addHeader(new DefaultHeader(HttpHeaders.CONNECTION, HeaderElements.CLOSE)); } final String[] headers = config.getHeaders(); if (headers != null) { for (final String s : headers) { final int pos = s.indexOf(':'); if (pos != -1) { final Header header = new DefaultHeader(s.substring(0, pos).trim(), s.substring(pos + 1)); request.addHeader(header); } } } if (config.isUseAcceptGZip()) { request.addHeader(new DefaultHeader("Accept-Encoding", "gzip")); } if (config.getSoapAction() != null && config.getSoapAction().length() > 0) { request.addHeader(new DefaultHeader("SOAPAction", config.getSoapAction())); } request.setScheme(host.getSchemeName()); request.setAuthority(new URIAuthority(host)); return request; }
From source file:org.apache.axis2.transport.http.impl.httpclient3.HTTPSenderImpl.java
/** * Method used to copy all the common properties * //from w ww . j a va2 s. c om * @param msgContext * - The messageContext of the request message * @param url * - The target URL * @param httpMethod * - The http method used to send the request * @param httpClient * - The httpclient used to send the request * @param soapActionString * - The soap action atring of the request message * @return MessageFormatter - The messageFormatter for the relavent request * message * @throws AxisFault * - Thrown in case an exception occurs */ protected MessageFormatter populateCommonProperties(MessageContext msgContext, URL url, HttpMethodBase httpMethod, HttpClient httpClient, String soapActionString) throws AxisFault { if (isAuthenticationEnabled(msgContext)) { httpMethod.setDoAuthentication(true); } MessageFormatter messageFormatter = TransportUtils.getMessageFormatter(msgContext); url = messageFormatter.getTargetAddress(msgContext, format, url); httpMethod.setPath(url.getPath()); httpMethod.setQueryString(url.getQuery()); httpMethod.setRequestHeader(HTTPConstants.HEADER_CONTENT_TYPE, messageFormatter.getContentType(msgContext, format, soapActionString)); httpMethod.setRequestHeader(HTTPConstants.HEADER_HOST, url.getHost()); if (msgContext.getOptions() != null && msgContext.getOptions().isManageSession()) { // setting the cookie in the out path Object cookieString = msgContext.getProperty(HTTPConstants.COOKIE_STRING); if (cookieString != null) { StringBuffer buffer = new StringBuffer(); buffer.append(cookieString); httpMethod.setRequestHeader(HTTPConstants.HEADER_COOKIE, buffer.toString()); } } if (httpVersion.equals(HTTPConstants.HEADER_PROTOCOL_10)) { httpClient.getParams().setVersion(HttpVersion.HTTP_1_0); } return messageFormatter; }
From source file:com.globalsight.connector.mindtouch.util.MindTouchHelper.java
/** * Send the translated contents back to MindTouch server via pages "post" * API. If the path specified page has already exists, it will be * updated;Otherwise, create a new page. * /*from ww w .j a va 2 s.com*/ * @param contentsTrgFile * @param pageInfo * @param targetLocale * @throws Exception */ public void postPageContents(File contentsTrgFile, MindTouchPageInfo pageInfo, String sourceLocale, String targetLocale) throws Exception { if (!isTargetServerExist(targetLocale) && !mtc.getIsPostToSourceServer()) { return; } CloseableHttpClient httpClient = getHttpClient(); String path = null; try { // to be safe, it must use "text/plain" content type instead of // "text/xml" or "application/xml". String content = FileUtil.readFile(contentsTrgFile, "UTF-8"); content = StringUtil.replace(content, " ", " "); String title = getTitleFromTranslatedContentXml(content); content = fixTitleValueInContentXml(content); // Only when target server exists, do this... if (isTargetServerExist(targetLocale)) { content = handleFiles(pageInfo.getPageId(), content, targetLocale, sourceLocale, pageInfo); } int times = 0; while (times < 2) { times++; HttpResponse response = null; try { String tmpContent = content; tmpContent = EditUtil.decodeXmlEntities(tmpContent); // empty body if (tmpContent.indexOf("<body/>") > -1) { tmpContent = ""; } // normal case else { tmpContent = tmpContent.substring(tmpContent.indexOf("<body>") + 6); tmpContent = tmpContent.substring(0, tmpContent.indexOf("</body>")); } StringEntity reqEntity = new StringEntity(tmpContent, "UTF-8"); reqEntity.setContentType("text/plain; charset=UTF-8"); path = getNewPath(pageInfo, sourceLocale, targetLocale); String strUrl = getPutServerUrl(targetLocale) + "/@api/deki/pages/=" + path + "/contents?edittime=now&abort=never"; if (title != null) { strUrl += "&title=" + title; } URL url = new URL(strUrl); URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null); HttpPost httppost = getHttpPost(uri, targetLocale); httppost.setEntity(reqEntity); response = httpClient.execute(httppost); String entityContent = null; if (response.getEntity() != null) { entityContent = EntityUtils.toString(response.getEntity()); } if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { break; } else { String msg = ""; if (times == 1) { msg = "First try "; } else { msg = "Second try "; } msg += "fails to post contents back to MindTouch server for page '" + path + "' : " + entityContent; logger.error(msg); } } catch (Exception e) { logger.error("Fail to post contents back to MindTouch server for " + times + "times for page '" + path + "'.", e); } finally { consumeQuietly(response); } } } catch (Exception e) { logger.error("Fail to post contents back to MindTouch server for page '" + path + "'.", e); } }
From source file:org.apache.jmeter.protocol.http.visualizers.RequestViewHTTP.java
@Override public void setSamplerResult(Object objectResult) { this.searchTextExtension.resetTextToFind(); if (objectResult instanceof HTTPSampleResult) { HTTPSampleResult sampleResult = (HTTPSampleResult) objectResult; // Display with same order HTTP protocol requestModel.addRow(new RowResult(JMeterUtils.getResString("view_results_table_request_http_method"), //$NON-NLS-1$ sampleResult.getHTTPMethod())); // Parsed request headers LinkedHashMap<String, String> lhm = JMeterUtils.parseHeaders(sampleResult.getRequestHeaders()); for (Entry<String, String> entry : lhm.entrySet()) { headersModel.addRow(new RowResult(entry.getKey(), entry.getValue())); }/*from w w w. ja va 2 s .co m*/ URL hUrl = sampleResult.getURL(); if (hUrl != null) { // can be null - e.g. if URL was invalid requestModel .addRow(new RowResult(JMeterUtils.getResString("view_results_table_request_http_protocol"), //$NON-NLS-1$ hUrl.getProtocol())); requestModel.addRow(new RowResult(JMeterUtils.getResString("view_results_table_request_http_host"), //$NON-NLS-1$ hUrl.getHost())); int port = hUrl.getPort() == -1 ? hUrl.getDefaultPort() : hUrl.getPort(); requestModel.addRow(new RowResult(JMeterUtils.getResString("view_results_table_request_http_port"), //$NON-NLS-1$ Integer.valueOf(port))); requestModel.addRow(new RowResult(JMeterUtils.getResString("view_results_table_request_http_path"), //$NON-NLS-1$ hUrl.getPath())); String queryGet = hUrl.getQuery() == null ? "" : hUrl.getQuery(); //$NON-NLS-1$ boolean isMultipart = isMultipart(lhm); // Concatenate query post if exists String queryPost = sampleResult.getQueryString(); if (!isMultipart && StringUtils.isNotBlank(queryPost)) { if (queryGet.length() > 0) { queryGet += PARAM_CONCATENATE; } queryGet += queryPost; } if (StringUtils.isNotBlank(queryGet)) { Set<Entry<String, String[]>> keys = RequestViewHTTP.getQueryMap(queryGet).entrySet(); for (Entry<String, String[]> entry : keys) { for (String value : entry.getValue()) { paramsModel.addRow(new RowResult(entry.getKey(), value)); } } } if (isMultipart && StringUtils.isNotBlank(queryPost)) { String contentType = lhm.get(HTTPConstants.HEADER_CONTENT_TYPE); String boundaryString = extractBoundary(contentType); MultipartUrlConfig urlconfig = new MultipartUrlConfig(boundaryString); urlconfig.parseArguments(queryPost); for (JMeterProperty prop : urlconfig.getArguments()) { Argument arg = (Argument) prop.getObjectValue(); paramsModel.addRow(new RowResult(arg.getName(), arg.getValue())); } } } // Display cookie in headers table (same location on http protocol) String cookie = sampleResult.getCookies(); if (cookie != null && cookie.length() > 0) { headersModel .addRow(new RowResult(JMeterUtils.getParsedLabel("view_results_table_request_http_cookie"), //$NON-NLS-1$ sampleResult.getCookies())); } } else { // add a message when no http sample requestModel.addRow(new RowResult("", //$NON-NLS-1$ JMeterUtils.getResString("view_results_table_request_http_nohttp"))); //$NON-NLS-1$ } }
From source file:org.restcomm.app.utillib.Reporters.WebReporter.WebReporter.java
public Bitmap getCarrierLogo(HashMap<String, String> carrierparams) throws LibException { SharedPreferences securePreferences = PreferenceKeys.getSecurePreferences(mContext); try {/* w w w.j a v a 2 s . co m*/ //NetworkRequest networksRequest = new NetworkRequest(mHost, mApiKey, carrierparams); URL request = NetworkRequest.getURL(mHost, mApiKey, carrierparams); String networksResponseString = null; String opresponse = securePreferences.getString(PreferenceKeys.Miscellaneous.OPERATOR_RESPONSE, null); String userresponse = securePreferences.getString(PreferenceKeys.Miscellaneous.USER_RESPONSE, null); String oprequest = securePreferences.getString(PreferenceKeys.Miscellaneous.OPERATOR_REQUEST, null); if (opresponse != null && oprequest != null && userresponse != null) { if (oprequest.equals(request.getQuery())) { networksResponseString = opresponse; } else if (isCarrierKnown(oprequest) && !isCarrierKnown(request.getQuery())) { networksResponseString = opresponse; LoggerUtil.logToFile(LoggerUtil.Level.DEBUG, TAG, "getCarrierLogo", "keep last known carrier" + request.getQuery()); } else LoggerUtil.logToFile(LoggerUtil.Level.DEBUG, TAG, "getCarrierLogo", "REQUEST CARRIER " + request.getQuery()); } if (networksResponseString == null) { HttpURLConnection connection = (HttpURLConnection) request.openConnection(); connection.connect(); verifyConnectionResponse(connection); networksResponseString = readString(connection); securePreferences.edit() .putString(PreferenceKeys.Miscellaneous.OPERATOR_RESPONSE, networksResponseString).commit(); securePreferences.edit() .putString(PreferenceKeys.Miscellaneous.OPERATOR_REQUEST, request.getQuery()).commit(); int userid = Global.getUserID(mContext); URL requestuser = UserRequest.getURL(mHost, mApiKey, userid); connection = (HttpURLConnection) requestuser.openConnection(); connection.connect(); verifyConnectionResponse(connection); String userResponseString = readString(connection); securePreferences.edit().putString(PreferenceKeys.Miscellaneous.USER_RESPONSE, userResponseString) .commit(); } JSONObject operator = new JSONObject(networksResponseString).getJSONObject(JSON_NETWORK_KEY); carrierCurr = new Carrier(operator); String carriername = carrierCurr.Name; String logo = carrierCurr.Path; logo = logo.substring(1); // remove leading slash carrierCurr.loadLogo(mContext); String logoPath = mContext.getApplicationContext().getFilesDir() + carrierCurr.Path; try { carrierCurr.Logo = BitmapFactory.decodeFile(logoPath); } catch (OutOfMemoryError e) { LoggerUtil.logToFile(LoggerUtil.Level.ERROR, "StatCategory", "StatCategory", "OutOfMemoryError loading logo " + logoPath); } return carrierCurr.Logo; } catch (IOException e) { throw new LibException(e); } catch (JSONException e) { securePreferences.edit().putString(PreferenceKeys.Miscellaneous.OPERATOR_RESPONSE, null).commit(); throw new LibException(e); } catch (Exception e) { throw new LibException(e); } }
From source file:at.spardat.xma.boot.transport.HTTPTransport.java
private Result getResourceImpl(IRtXMASessionClient session, URL url, long modifiedSince, String etag) throws CommunicationException { /* locals ---------------------------------- */ Result result = new Result(); int code = 0; HttpURLConnection conn;/* w w w .j a v a 2 s .co m*/ /* locals ---------------------------------- */ try { conn = (HttpURLConnection) url.openConnection(); if (conn instanceof HttpsURLConnection) { ((HttpsURLConnection) conn).setHostnameVerifier(hostnameVerifier); } sendCookies(session, url, conn); if (etag != null) { conn.setRequestProperty("If-None-Match", etag); //$NON-NLS-1$ } String strUrl = url.toExternalForm(); if (url.getQuery() == null && (strUrl.endsWith(".jar") || strUrl.endsWith(".xml"))) { conn.setRequestProperty(Statics.HTTP_CACHE_CONTROL, Statics.HTTP_MAX_AGE + "=0"); //$NON-NLS-1$ } if (modifiedSince > 0) { // see sun bugid: 4397096 // if HTTP_Util library is used, the original method may also be used. // conn.setIfModifiedSince(modifiedSince); conn.setRequestProperty(Statics.strIfModifiedSince, HTTPTransport.httpDate(modifiedSince)); } conn.setRequestProperty(Statics.HTTP_ACCEPT, "*/*"); //$NON-NLS-1$ conn.setRequestProperty(Statics.HTTP_USER_AGENT, Statics.HTTP_USER_AGENT_NAME); } catch (IOException exc) { log_.log(LogLevel.WARNING, "error loading '" + url.toString() + "' form server:", exc); //$NON-NLS-1$ throw new ConnectException("error loading '" + url.toString() + "' form server:", exc); } try { code = conn.getResponseCode(); if (code == HttpURLConnection.HTTP_NOT_MODIFIED) { result.contentLength_ = 0; result.lastModified_ = conn.getLastModified(); if (result.lastModified_ <= 0) { result.lastModified_ = modifiedSince; } result.expirationDate_ = conn.getExpiration(); result.etag_ = conn.getHeaderField(Statics.strEtag); if (result.etag_ == null) { result.etag_ = etag; } log_.log(LogLevel.FINE, "resource not modified: {0}", url.toExternalForm()); //$NON-NLS-1$ } else if (code == HttpURLConnection.HTTP_OK) { result.contentLength_ = conn.getContentLength(); result.lastModified_ = conn.getLastModified(); result.expirationDate_ = conn.getExpiration(); result.etag_ = conn.getHeaderField(Statics.strEtag); result.transformations_ = conn.getHeaderField(Statics.TRANSFORM_HEADER); result.setBuffer(this.readOutput(conn)); if (result.contentLength_ < 0) { result.contentLength_ = result.buffer_.length; } } else if (code == HttpURLConnection.HTTP_MOVED_TEMP || code == HttpURLConnection.HTTP_MOVED_PERM) { String location = conn.getHeaderField(Statics.HTTP_LOCATION); throw new RedirectException("redirect received from " + url.toString() + " to " + location, code, location); } else { if (code < 500) throw new ConnectException("error loading '" + url.toString() + "' from the server:", code); else throw new ServerException("error loading '" + url.toString() + "' from the server:", code); } readCookies(session, url, conn); } catch (RedirectException re) { throw re; } catch (CommunicationException ce) { if (code != 0) log_.log(LogLevel.WARNING, "http returncode: {0}", Integer.toString(code)); //$NON-NLS-1$ log_.log(LogLevel.WARNING, "error loading '" + url.toString() + "' from the server:", ce); //$NON-NLS-1$ throw ce; } catch (Exception ex) { if (code != 0) log_.log(LogLevel.WARNING, "http returncode: {0}", Integer.toString(code)); //$NON-NLS-1$ log_.log(LogLevel.WARNING, "error loading '" + url.toString() + "' from the server:", ex); //$NON-NLS-1$ if (code < 500) throw new ConnectException("error loading '" + url.toString() + "' from the server:", ex); else throw new ServerException("error loading '" + url.toString() + "' from the server:", ex); } return result; }
From source file:org.archive.nutchwax.ImporterToHdfs.java
/** * Import an ARCRecord./*from www .j a v a 2 s.co m*/ * * @param record * @param segmentName * @param collectionName * @param output * @return whether record was imported or not (i.e. filtered out due to URL * filtering rules, etc.) */ private boolean importRecord(ARCRecord record, String segmentName, String collectionName, OutputCollector output, Writer writer) { ARCRecordMetaData meta = record.getMetaData(); if (LOG.isInfoEnabled()) { LOG.info("Consider URL: " + meta.getUrl() + " (" + meta.getMimetype() + ") [" + meta.getLength() + "]"); } if (!this.httpStatusCodeFilter.isAllowed(record.getStatusCode())) { if (LOG.isInfoEnabled()) { LOG.info("Skip URL: " + meta.getUrl() + " HTTP status:" + record.getStatusCode()); } return false; } try { // Skip the HTTP headers in the response body, so that the // parsers are parsing the reponse body and not the HTTP // headers. record.skipHttpHeader(); // We use record.available() rather than meta.getLength() // because the latter includes the size of the HTTP header, // which we just skipped. byte[] bytes = readBytes(record, record.available()); // If there is no digest, then we assume we're reading an // ARCRecord not a WARCRecord. In that case, we close the // record, which updates the digest string. Then we tweak the // digest string so we have the same for for both ARC and WARC // records. if (meta.getDigest() == null) { record.close(); // This is a bit hacky, but ARC and WARC records produce // two slightly different digest formats. WARC record // digests have the algorithm name as a prefix, such as // "sha1:PD3SS4WWZVFWTDC63RU2MWX7BVC2Y2VA" but the // ArcRecord.getDigestStr() does not. Since we want the // formats to match, we prepend the "sha1:" prefix to ARC // record digest. meta.setDigest("sha1:" + record.getDigestStr()); } // Normalize and filter String url = this.normalizeAndFilterUrl(meta.getUrl(), meta.getDigest(), meta.getDate()); if (url == null) { if (LOG.isInfoEnabled()) { LOG.info("Skip URL: " + meta.getUrl()); } return false; } // We create a key which combines the URL and digest values. // This is necessary because Nutch stores all the data in // MapFiles, which are basically just {key,value} pairs. // // If we use just the URL as the key (which is the way Nutch // usually works) then we have problems with multiple, // different copies of the same URL. If we try and store two // different copies of the same URL (each having a different // digest) and only use the URL as the key, when the MapFile // is written, only *one* copy of the page will be stored. // // Think about it, we're basically doing: // MapFile.put( url, value1 ); // MapFile.put( url, value2 ); // Only one of those url,value mappings will keep, the other // is over-written. // // So, by using the url+digest as the key, we can have all the // data stored. The only problem is all over in Nutch where // the key==url is assumed :( String key = url + " " + meta.getDigest(); Metadata contentMetadata = new Metadata(); // Set the segment name, just as is done by standard Nutch fetching. // Then, add the NutchWAX-specific metadata fields. contentMetadata.set(Nutch.SEGMENT_NAME_KEY, segmentName); // We store both the normal URL and the URL+digest key for // later retrieval by the indexing plugin(s). contentMetadata.set(NutchWax.URL_KEY, url); // contentMetadata.set( NutchWax.ORIG_KEY, key ); contentMetadata.set(NutchWax.FILENAME_KEY, meta.getArcFile().getName()); contentMetadata.set(NutchWax.FILEOFFSET_KEY, String.valueOf(record.getHeader().getOffset())); contentMetadata.set(NutchWax.COLLECTION_KEY, collectionName); contentMetadata.set(NutchWax.DATE_KEY, meta.getDate()); contentMetadata.set(NutchWax.DIGEST_KEY, meta.getDigest()); contentMetadata.set(NutchWax.CONTENT_TYPE_KEY, meta.getMimetype()); contentMetadata.set(NutchWax.CONTENT_LENGTH_KEY, String.valueOf(meta.getLength())); contentMetadata.set(NutchWax.HTTP_RESPONSE_KEY, String.valueOf(record.getStatusCode())); Content content = new Content(url, url, bytes, meta.getMimetype(), contentMetadata, getConf()); // ----------------- // write to seqencefile byte[] contentInOctets = content.getContent(); String htmlraw = new String(); // meta only contains char encodings // LOG.info("Metadata count: " + contentMetadata.names().length); // for (String name : contentMetadata.names()){ // LOG.info("meta " + name + " : " + contentMetadata.get(name)); // } // try getting content encoding try { htmlraw = new String(contentInOctets, contentMetadata.get("OriginalCharEncoding")); } catch (Exception e) { LOG.warn("could not get content with OriginalCharEncoding"); } // if unable, try utf-8 if (htmlraw.length() == 0) { try { htmlraw = new String(contentInOctets, "UTF-8"); } catch (UnsupportedEncodingException e) { LOG.error("unable to convert content into string"); } } URL url_h = null; try { url_h = new URL(content.getUrl()); } catch (MalformedURLException e1) { LOG.error("Malformed URL Exception: " + e1.getMessage()); } String protocol = url_h.getProtocol(); String hostname = url_h.getHost(); String urlpath = url_h.getPath(); String param = url_h.getQuery(); //LOG.info("HOST:" + hostname); //LOG.info("PATH:" + urlpath); //LOG.info("PROTOCOL:" + protocol); //LOG.info("PARAM: " + param); String date = meta.getDate(); // LOG.info("meta date: " + date); Text key_h = new Text(protocol + "::" + hostname + "::" + urlpath + "::" + param + "::" + date); Text value = new Text(htmlraw); try { LOG.info("len: " + writer.getLength() + ", key: " + key_h + ", value len: " + value.getLength()); writer.append(key_h, value); } catch (IOException e) { LOG.error("SequenceFile IOException: " + e.getMessage()); } // ----------------- output(output, new Text(key), content); return true; } catch (Throwable t) { LOG.error("Import fail : " + meta.getUrl(), t); } return false; }
From source file:com.connectsdk.service.DLNAService.java
String encodeURL(String mediaURL) throws MalformedURLException, URISyntaxException, UnsupportedEncodingException { if (mediaURL == null || mediaURL.isEmpty()) { return ""; }/* ww w . jav a 2s .co m*/ String decodedURL = URLDecoder.decode(mediaURL, "UTF-8"); if (decodedURL.equals(mediaURL)) { URL url = new URL(mediaURL); URI uri = new URI(url.getProtocol(), url.getUserInfo(), url.getHost(), url.getPort(), url.getPath(), url.getQuery(), url.getRef()); return uri.toASCIIString(); } return mediaURL; }