List of usage examples for java.lang Float isNaN
public static boolean isNaN(float v)
From source file:org.apache.pdfbox.text.PDFTextStripper.java
/** * This will print the text of the processed page to "output". It will estimate, based on the coordinates of the * text, where newlines and word spacings should be placed. The text will be sorted only if that feature was * enabled./*from w w w. j ava 2 s . c o m*/ * * @throws IOException If there is an error writing the text. */ protected void writePage() throws IOException { float maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE; float minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE; float endOfLastTextX = END_OF_LAST_TEXT_X_RESET_VALUE; float lastWordSpacing = LAST_WORD_SPACING_RESET_VALUE; float maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE; PositionWrapper lastPosition = null; PositionWrapper lastLineStartPosition = null; boolean startOfPage = true; // flag to indicate start of page boolean startOfArticle; if (charactersByArticle.size() > 0) { writePageStart(); } for (List<TextPosition> textList : charactersByArticle) { if (getSortByPosition()) { TextPositionComparator comparator = new TextPositionComparator(); // because the TextPositionComparator is not transitive, but // JDK7+ enforces transitivity on comparators, we need to use // a custom quicksort implementation (which is slower, unfortunately). if (useCustomQuickSort) { QuickSort.sort(textList, comparator); } else { Collections.sort(textList, comparator); } } startArticle(); startOfArticle = true; // Now cycle through to print the text. // We queue up a line at a time before we print so that we can convert // the line from presentation form to logical form (if needed). List<LineItem> line = new ArrayList<LineItem>(); Iterator<TextPosition> textIter = textList.iterator(); // PDF files don't always store spaces. We will need to guess where we should add // spaces based on the distances between TextPositions. Historically, this was done // based on the size of the space character provided by the font. In general, this // worked but there were cases where it did not work. Calculating the average character // width and using that as a metric works better in some cases but fails in some cases // where the spacing worked. So we use both. NOTE: Adobe reader also fails on some of // these examples. // Keeps track of the previous average character width float previousAveCharWidth = -1; while (textIter.hasNext()) { TextPosition position = textIter.next(); PositionWrapper current = new PositionWrapper(position); String characterValue = position.getUnicode(); // Resets the average character width when we see a change in font // or a change in the font size if (lastPosition != null && (position.getFont() != lastPosition.getTextPosition().getFont() || position.getFontSize() != lastPosition.getTextPosition().getFontSize())) { previousAveCharWidth = -1; } float positionX; float positionY; float positionWidth; float positionHeight; // If we are sorting, then we need to use the text direction // adjusted coordinates, because they were used in the sorting. if (getSortByPosition()) { positionX = position.getXDirAdj(); positionY = position.getYDirAdj(); positionWidth = position.getWidthDirAdj(); positionHeight = position.getHeightDir(); } else { positionX = position.getX(); positionY = position.getY(); positionWidth = position.getWidth(); positionHeight = position.getHeight(); } // The current amount of characters in a word int wordCharCount = position.getIndividualWidths().length; // Estimate the expected width of the space based on the // space character with some margin. float wordSpacing = position.getWidthOfSpace(); float deltaSpace; if (wordSpacing == 0 || Float.isNaN(wordSpacing)) { deltaSpace = Float.MAX_VALUE; } else { if (lastWordSpacing < 0) { deltaSpace = wordSpacing * getSpacingTolerance(); } else { deltaSpace = (wordSpacing + lastWordSpacing) / 2f * getSpacingTolerance(); } } // Estimate the expected width of the space based on the average character width // with some margin. This calculation does not make a true average (average of // averages) but we found that it gave the best results after numerous experiments. // Based on experiments we also found that .3 worked well. float averageCharWidth; if (previousAveCharWidth < 0) { averageCharWidth = positionWidth / wordCharCount; } else { averageCharWidth = (previousAveCharWidth + positionWidth / wordCharCount) / 2f; } float deltaCharWidth = averageCharWidth * getAverageCharTolerance(); // Compares the values obtained by the average method and the wordSpacing method // and picks the smaller number. float expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE; if (endOfLastTextX != END_OF_LAST_TEXT_X_RESET_VALUE) { if (deltaCharWidth > deltaSpace) { expectedStartOfNextWordX = endOfLastTextX + deltaSpace; } else { expectedStartOfNextWordX = endOfLastTextX + deltaCharWidth; } } if (lastPosition != null) { if (startOfArticle) { lastPosition.setArticleStart(); startOfArticle = false; } // RDD - Here we determine whether this text object is on the current // line. We use the lastBaselineFontSize to handle the superscript // case, and the size of the current font to handle the subscript case. // Text must overlap with the last rendered baseline text by at least // a small amount in order to be considered as being on the same line. // XXX BC: In theory, this check should really check if the next char is in // full range seen in this line. This is what I tried to do with minYTopForLine, // but this caused a lot of regression test failures. So, I'm leaving it be for // now if (!overlap(positionY, positionHeight, maxYForLine, maxHeightForLine)) { writeLine(normalize(line)); line.clear(); lastLineStartPosition = handleLineSeparation(current, lastPosition, lastLineStartPosition, maxHeightForLine); expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE; maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE; maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE; minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE; } // test if our TextPosition starts after a new word would be expected to start if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE && expectedStartOfNextWordX < positionX && // only bother adding a space if the last character was not a space lastPosition.getTextPosition().getUnicode() != null && !lastPosition.getTextPosition().getUnicode().endsWith(" ")) { line.add(LineItem.getWordSeparator()); } } if (positionY >= maxYForLine) { maxYForLine = positionY; } // RDD - endX is what PDF considers to be the x coordinate of the // end position of the text. We use it in computing our metrics below. endOfLastTextX = positionX + positionWidth; // add it to the list if (characterValue != null) { if (startOfPage && lastPosition == null) { writeParagraphStart();// not sure this is correct for RTL? } line.add(new LineItem(position)); } maxHeightForLine = Math.max(maxHeightForLine, positionHeight); minYTopForLine = Math.min(minYTopForLine, positionY - positionHeight); lastPosition = current; if (startOfPage) { lastPosition.setParagraphStart(); lastPosition.setLineStart(); lastLineStartPosition = lastPosition; startOfPage = false; } lastWordSpacing = wordSpacing; previousAveCharWidth = averageCharWidth; } // print the final line if (line.size() > 0) { writeLine(normalize(line)); writeParagraphEnd(); } endArticle(); } writePageEnd(); }
From source file:com.github.rnewson.couchdb.lucene.DatabaseIndexer.java
public void search(final HttpServletRequest req, final HttpServletResponse resp) throws IOException, JSONException { final IndexState state = getState(req, resp); if (state == null) return;// www .ja va 2 s . c o m final IndexSearcher searcher = state.borrowSearcher(isStaleOk(req)); final String etag = state.getEtag(); final FastVectorHighlighter fvh = new FastVectorHighlighter(true, true); final JSONArray result = new JSONArray(); try { if (state.notModified(req)) { resp.setStatus(304); return; } for (final String queryString : getQueryStrings(req)) { final Analyzer analyzer = state.analyzer(req.getParameter("analyzer")); final Operator operator = "and".equalsIgnoreCase(req.getParameter("default_operator")) ? Operator.AND : Operator.OR; final Query q = state.parse(queryString, operator, analyzer); final JSONObject queryRow = new JSONObject(); queryRow.put("q", q.toString()); if (getBooleanParameter(req, "debug")) { queryRow.put("plan", QueryPlan.toPlan(q)); queryRow.put("analyzer", analyzer.getClass()); } queryRow.put("etag", etag); if (getBooleanParameter(req, "rewrite")) { final Query rewritten_q = q.rewrite(searcher.getIndexReader()); queryRow.put("rewritten_q", rewritten_q.toString()); final JSONObject freqs = new JSONObject(); final Set<Term> terms = new HashSet<Term>(); rewritten_q.extractTerms(terms); for (final Object term : terms) { final int freq = searcher.getIndexReader().docFreq((Term) term); freqs.put(term.toString(), freq); } queryRow.put("freqs", freqs); } else { // Perform the search. final TopDocs td; final StopWatch stopWatch = new StopWatch(); final boolean include_docs = getBooleanParameter(req, "include_docs"); final int highlights = getIntParameter(req, "highlights", 0); final int highlight_length = max(getIntParameter(req, "highlight_length", 18), 18); // min for fast term vector highlighter is 18 final boolean include_termvectors = getBooleanParameter(req, "include_termvectors"); final int limit = getIntParameter(req, "limit", ini.getInt("lucene.limit", 25)); final Sort sort = CustomQueryParser.toSort(req.getParameter("sort")); final int skip = getIntParameter(req, "skip", 0); final Set<String> fieldsToLoad; if (req.getParameter("include_fields") == null) { fieldsToLoad = null; } else { final String[] fields = Utils.splitOnCommas(req.getParameter("include_fields")); final List<String> list = Arrays.asList(fields); fieldsToLoad = new HashSet<String>(list); } if (sort == null) { td = searcher.search(q, null, skip + limit); } else { td = searcher.search(q, null, skip + limit, sort); } stopWatch.lap("search"); // Fetch matches (if any). final int max = Math.max(0, Math.min(td.totalHits - skip, limit)); final JSONArray rows = new JSONArray(); final String[] fetch_ids = new String[max]; for (int i = skip; i < skip + max; i++) { final Document doc; if (fieldsToLoad == null) { doc = searcher.doc(td.scoreDocs[i].doc); } else { doc = searcher.doc(td.scoreDocs[i].doc, fieldsToLoad); } final JSONObject row = new JSONObject(); final JSONObject fields = new JSONObject(); final JSONObject highlight_rows = new JSONObject(); // Include stored fields. for (final IndexableField f : doc.getFields()) { if (!f.fieldType().stored()) { continue; } final String name = f.name(); final Object value; if (f.numericValue() != null) { value = f.numericValue(); } else { value = f.stringValue(); } if (value != null) { if ("_id".equals(name)) { row.put("id", value); } else { if (!fields.has(name)) { fields.put(name, value); } else { final Object obj = fields.get(name); if (obj instanceof String) { final JSONArray arr = new JSONArray(); arr.put(obj); arr.put(value); fields.put(name, arr); } else { assert obj instanceof JSONArray; ((JSONArray) obj).put(value); } } if (highlights > 0) { String[] frags = fvh.getBestFragments(fvh.getFieldQuery(q), searcher.getIndexReader(), td.scoreDocs[i].doc, name, highlight_length, highlights); highlight_rows.put(name, frags); } } } } if (!Float.isNaN(td.scoreDocs[i].score)) { row.put("score", td.scoreDocs[i].score); } // Include sort order (if any). if (td instanceof TopFieldDocs) { final FieldDoc fd = (FieldDoc) ((TopFieldDocs) td).scoreDocs[i]; row.put("sort_order", fd.fields); } // Fetch document (if requested). if (include_docs) { fetch_ids[i - skip] = doc.get("_id"); } if (fields.length() > 0) { row.put("fields", fields); } if (highlight_rows.length() > 0) { row.put("highlights", highlight_rows); } rows.put(row); } // Fetch documents (if requested). if (include_docs && fetch_ids.length > 0) { final List<CouchDocument> fetched_docs = database.getDocuments(fetch_ids); for (int j = 0; j < max; j++) { final CouchDocument doc = fetched_docs.get(j); final JSONObject row = doc == null ? new JSONObject("{\"error\":\"not_found\"}") : doc.asJson(); rows.getJSONObject(j).put("doc", row); } } stopWatch.lap("fetch"); queryRow.put("skip", skip); queryRow.put("limit", limit); queryRow.put("total_rows", td.totalHits); queryRow.put("search_duration", stopWatch.getElapsed("search")); queryRow.put("fetch_duration", stopWatch.getElapsed("fetch")); // Include sort info (if requested). if (td instanceof TopFieldDocs) { queryRow.put("sort_order", CustomQueryParser.toJSON(((TopFieldDocs) td).fields)); } queryRow.put("rows", rows); } result.put(queryRow); } } catch (final ParseException e) { ServletUtils.sendJsonError(req, resp, 400, "Bad query syntax: " + e.getMessage()); return; } finally { state.returnSearcher(searcher); } resp.setHeader("ETag", etag); resp.setHeader("Cache-Control", "must-revalidate"); ServletUtils.setResponseContentTypeAndEncoding(req, resp); final Object json = result.length() > 1 ? result : result.getJSONObject(0); final String callback = req.getParameter("callback"); final String body; if (callback != null) { body = String.format("%s(%s)", callback, json); } else { if (json instanceof JSONObject) { final JSONObject obj = (JSONObject) json; body = getBooleanParameter(req, "debug") ? obj.toString(2) : obj.toString(); } else { final JSONArray arr = (JSONArray) json; body = getBooleanParameter(req, "debug") ? arr.toString(2) : arr.toString(); } } final Writer writer = resp.getWriter(); try { writer.write(body); } finally { writer.close(); } }
From source file:ml.shifu.shifu.core.dtrain.wdl.WDLWorker.java
private float getFloatValue(String input) { // check here to avoid bad performance in failed NumberFormatUtils.getFloat(input, 0f) float floatValue = input.length() == 0 ? 0f : NumberFormatUtils.getFloat(input, 0f); // no idea about why NaN in input data, we should process it as missing value TODO , according to norm type return (Float.isNaN(floatValue) || Double.isNaN(floatValue)) ? 0f : floatValue; }
From source file:pt.lsts.neptus.mra.importers.deltat.DeltaTParser.java
@Override public BathymetrySwath nextSwath(double prob) { if (position == null) position = new CorrectedPosition(source); try {//from www .ja va 2s . c om if (curPos >= channel.size()) { // cleanupResultOutputFile(); return null; } BathymetryPoint data[]; realNumberOfBeams = 0; buf = channel.map(MapMode.READ_ONLY, curPos, 256); header = new DeltaTHeader(); header.parse(buf); hasIntensity = header.hasIntensity; // if (hasIntensity) // NeptusLog.pub().info("LOG has intensity"); // else // NeptusLog.pub().info("Log doesn't have intensity"); // Parse and process data ( no need to create another structure for this ) if (header.hasIntensity) buf = channel.map(MapMode.READ_ONLY, curPos + 256, header.numBeams * 4); else buf = channel.map(MapMode.READ_ONLY, curPos + 256, header.numBeams * 2); data = new BathymetryPoint[header.numBeams]; long timestamp = header.timestamp + MRAProperties.timestampMultibeamIncrement; boolean poseFromCorrected = true; SystemPositionAndAttitude pose = position.getPosition(timestamp / 1000.0); if (pose == null) { poseFromCorrected = false; pose = new SystemPositionAndAttitude(); LocationType loc = new LocationType(); loc.setLatitudeDegs(CoordinateUtil.latFrom83PFormatWorker(header.gnssShipPosLat)); loc.setLongitudeDegs(CoordinateUtil.lonFrom83PFormatWorker(header.gnssShipPosLon)); loc.setAbsoluteDepth(-1); pose.setPosition(loc); pose.setTime(timestamp); pose.setAltitude(header.altitude); pose.setRoll(Math.toRadians(header.rollAngleDegreesOrientModule)); pose.setPitch(Math.toRadians(header.pitchAngleDegreesOrientModule)); pose.setYaw(Math.toRadians(header.headingAngleDegreesOrientModule)); NeptusLog.pub().warn("No position found on navigation, using partial data from Sonar"); } boolean doSpeedCorrection = MRAProperties.soundSpeedCorrection; if (generateProcessReport) { recordMsgln(); recordMsgln("% Swath type & version : " + header.fileType + ", " + header.fileVersion); recordMsgln("% Swath time : " + DateTimeUtil.dateTimeFileNameFormatterMillis.format(new Date(timestamp))); recordMsgln("% Swath position : " + pose.getPosition().toString().replaceAll("\n", " ") + "m depth :: " + MathMiscUtils.round(pose.getAltitude(), 2) + "m altitude" + (poseFromCorrected ? " from corrected position" : " from data")); recordMsgln("% Swath attitude : R" + MathMiscUtils.round(Math.toDegrees(pose.getRoll()), 1) + "\u00B0 P" + MathMiscUtils.round(Math.toDegrees(pose.getPitch()), 1) + "\u00B0 Y" + MathMiscUtils.round(Math.toDegrees(pose.getYaw()), 1) + "\u00B0"); recordMsgln("% Orient. module : R" + MathMiscUtils.round(Math.toDegrees(header.rollAngleDegreesOrientModule), 1) + "\u00B0 P" + MathMiscUtils.round(Math.toDegrees(header.pitchAngleDegreesOrientModule), 1) + "\u00B0 H" + MathMiscUtils.round(Math.toDegrees(header.headingAngleDegreesOrientModule), 1) + "\u00B0"); recordMsgln("% Ship Course : " + header.gnssShipCourse + "\u00B0"); recordMsgln("% Ship Lat/Lon : " + header.gnssShipPosLat + " " + header.gnssShipPosLon); recordMsgln("% Sonar XYZ offsets : " + header.sonarXOffset + "m, " + header.sonarYOffset + "m, " + header.sonarZOffset + "m"); recordMsgln("% Angle start/increment: " + header.startAngle + "\u00B0" + ", " + header.angleIncrement + "\u00B0"); recordMsgln("% Beams : " + header.numBeams); recordMsgln("% Samples per beam : " + header.samplesPerBeam); recordMsgln("% Number of pings avg : " + header.numberOfPingsAveraged); recordMsgln("% Sample rate high/std : " + (header.sampleRateHigh ? "high" : "std") + " [std(1 in 500)/high (1 in 5000)]"); recordMsgln("% Range : " + header.range + "m"); recordMsgln("% Range resolution : " + header.rangeResolution + "mm"); recordMsgln("% Sonar Freq. : " + header.sonarFreqKHz + "kHz"); recordMsgln("% Pulse length : " + header.pulseLength + "\u03BCs"); recordMsg("% 1/PRF : " + header.pulseRepetingRate + "ms"); recordMsgln( " (" + MathMiscUtils.parseToEngineeringNotation(1. / (header.pulseRepetingRate / 1E3), 1) + "Hz)"); recordMsgln("% Ping number : " + header.pingNumber); recordMsgln("% Sector size : " + header.sectorSize + "\u00B0 :: " + (header.angleIncrement * header.numBeams) + "\u00B0 calculated"); recordMsgln("% Speed : " + MathMiscUtils.round(header.speed, 1) + "m/s"); recordMsgln("% Sound speed : " + header.soundVelocity + "m/s" + (doSpeedCorrection ? "" : " (1500m/s used for calculation)")); recordMsgln("% Roll correction : " + (header.dataIsCorrectedForRoll ? "yes" : "no")); recordMsgln("% RayBending correction: " + (header.dataIsCorrectedForRayBending ? "yes" : "no")); recordMsgln("% Op overlap mode : " + (header.sonarIsOperatingInOverlappedMode ? "yes" : "no")); recordMsgln("% Altitude : " + header.altitude + "m"); recordMsgln("% ---------------------"); } StringBuilder rangesStr = new StringBuilder(); StringBuilder heightStr = new StringBuilder(); StringBuilder intensityStr = new StringBuilder(); StringBuilder oxStr = new StringBuilder(); StringBuilder oyStr = new StringBuilder(); StringBuilder deltasStr = new StringBuilder(); float prevX = Float.NaN; float prevY = Float.NaN; for (int c = 0; c < header.numBeams; c++) { double range = buf.getShort(c * 2) * (header.rangeResolution / 1000.0); // rangeResolution in mm if (range == 0.0 || Math.random() > prob) { if (generateProcessReport) { if (range != 0) { recordMsgln("% Skip swath beam " + c + " range=" + range); } else { rangesStr.append(" " + MathMiscUtils.round(range, 3)); heightStr.append(" " + Double.NaN); intensityStr.append(" " + Double.NaN); oxStr.append(" " + Double.NaN); oyStr.append(" " + Double.NaN); deltasStr.append(" " + Float.NaN); prevX = Float.NaN; prevY = Float.NaN; } } continue; } if (doSpeedCorrection && header.soundVelocity != 1500f) { range = range * header.soundVelocity / 1500f; } if (generateProcessReport) { rangesStr.append(" " + MathMiscUtils.round(range, 3)); } double angle = header.startAngle + header.angleIncrement * c; float height = (float) (range * Math.cos(Math.toRadians(angle)) + pose.getPosition().getDepth()); double x = range * Math.sin(Math.toRadians(angle)); double yawAngle = -pose.getYaw(); float ox = (float) (x * Math.sin(yawAngle)); float oy = (float) (x * Math.cos(yawAngle)); if (header.hasIntensity) { short intensity = buf.getShort(480 + (c * 2) - 1); // sometimes there's a return = 0 int intensityInt = 0xffff & intensity; data[realNumberOfBeams] = new BathymetryPoint(ox, oy, height, intensityInt); data[realNumberOfBeams].intensityMaxValue = 65535; if (generateProcessReport) intensityStr.append(" " + intensityInt); } else { data[realNumberOfBeams] = new BathymetryPoint(ox, oy, height); data[realNumberOfBeams].intensityMaxValue = 65535; if (generateProcessReport) intensityStr.append(" " + Double.NaN); } realNumberOfBeams++; if (generateProcessReport) { heightStr.append(" " + MathMiscUtils.round(height, 3)); oxStr.append(" " + MathMiscUtils.round(ox, 3)); oyStr.append(" " + MathMiscUtils.round(oy, 3)); if (!Float.isNaN(prevX) && !Float.isNaN(prevY)) { float delta = (float) Math.sqrt((ox - prevX) * (ox - prevX) + (oy - prevY) * (oy - prevY)); deltasStr.append(" " + MathMiscUtils.round(delta, 3)); } else { deltasStr.append(" " + Float.NaN); } prevX = ox; prevY = oy; } } if (generateProcessReport) { recordMsgln("% Ranges:"); recordMsgln(rangesStr.toString()); recordMsgln("% Heights:"); recordMsgln(heightStr.toString()); recordMsgln("% Intensities:"); recordMsgln(intensityStr.toString()); recordMsgln("% Offsets X:"); recordMsgln(oxStr.toString()); recordMsgln("% Offsets Y:"); recordMsgln(oyStr.toString()); recordMsgln("% Deltas:"); recordMsgln(deltasStr.toString()); recordMsgln("% Number of beams vs read: " + header.numBeams + " vs " + realNumberOfBeams); } curPos += header.numBytes; // Advance current position BathymetrySwath swath = new BathymetrySwath(header.timestamp, pose, data); swath.setNumBeams(realNumberOfBeams); return swath; } catch (Exception e) { e.printStackTrace(); return null; } }
From source file:net.pms.util.Rational.java
/** * Returns a {@link Rational} whose value is {@code (this * value)}. * * @param value the value to be multiplied by this {@link Rational}. * @return The multiplication result./*ww w . ja va 2 s . com*/ */ @Nonnull public Rational multiply(float value) { if (isNaN() || Float.isNaN(value)) { return NaN; } if (isInfinite() || Float.isInfinite(value)) { if (signum() == 0 || value == 0f) { return NaN; // Infinity by zero } if (value > 0) { return signum() > 0 ? POSITIVE_INFINITY : NEGATIVE_INFINITY; } return signum() < 0 ? POSITIVE_INFINITY : NEGATIVE_INFINITY; } if (value == 0f) { return ZERO; } return multiply(valueOf(value)); }
From source file:com.repeatability.pdf.PDFTextStripper.java
/** * This will print the text of the processed page to "output". It will estimate, based on the coordinates of the * text, where newlines and word spacings should be placed. The text will be sorted only if that feature was * enabled./*from w w w . j av a2 s. co m*/ * * @throws IOException If there is an error writing the text. */ protected void writePage() throws IOException { float maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE; float minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE; float endOfLastTextX = END_OF_LAST_TEXT_X_RESET_VALUE; float lastWordSpacing = LAST_WORD_SPACING_RESET_VALUE; float maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE; PositionWrapper lastPosition = null; PositionWrapper lastLineStartPosition = null; boolean startOfPage = true; // flag to indicate start of page boolean startOfArticle; if (charactersByArticle.size() > 0) { writePageStart(); } for (List<TextPosition> textList : charactersByArticle) { if (getSortByPosition()) { TextPositionComparator comparator = new TextPositionComparator(); // because the TextPositionComparator is not transitive, but // JDK7+ enforces transitivity on comparators, we need to use // a custom quicksort implementation (which is slower, unfortunately). if (useCustomQuickSort) { QuickSort.sort(textList, comparator); } else { Collections.sort(textList, comparator); } } Iterator<TextPosition> textIter = textList.iterator(); startArticle(); startOfArticle = true; // Now cycle through to print the text. // We queue up a line at a time before we print so that we can convert // the line from presentation form to logical form (if needed). List<LineItem> line = new ArrayList<LineItem>(); textIter = textList.iterator(); // start from the beginning again // PDF files don't always store spaces. We will need to guess where we should add // spaces based on the distances between TextPositions. Historically, this was done // based on the size of the space character provided by the font. In general, this // worked but there were cases where it did not work. Calculating the average character // width and using that as a metric works better in some cases but fails in some cases // where the spacing worked. So we use both. NOTE: Adobe reader also fails on some of // these examples. // Keeps track of the previous average character width float previousAveCharWidth = -1; while (textIter.hasNext()) { TextPosition position = textIter.next(); PositionWrapper current = new PositionWrapper(position); String characterValue = position.getUnicode(); // Resets the average character width when we see a change in font // or a change in the font size if (lastPosition != null && (position.getFont() != lastPosition.getTextPosition().getFont() || position.getFontSize() != lastPosition.getTextPosition().getFontSize())) { previousAveCharWidth = -1; } float positionX; float positionY; float positionWidth; float positionHeight; // If we are sorting, then we need to use the text direction // adjusted coordinates, because they were used in the sorting. if (getSortByPosition()) { positionX = position.getXDirAdj(); positionY = position.getYDirAdj(); positionWidth = position.getWidthDirAdj(); positionHeight = position.getHeightDir(); } else { positionX = position.getX(); positionY = position.getY(); positionWidth = position.getWidth(); positionHeight = position.getHeight(); } // The current amount of characters in a word int wordCharCount = position.getIndividualWidths().length; // Estimate the expected width of the space based on the // space character with some margin. float wordSpacing = position.getWidthOfSpace(); float deltaSpace; if (wordSpacing == 0 || Float.isNaN(wordSpacing)) { deltaSpace = Float.MAX_VALUE; } else { if (lastWordSpacing < 0) { deltaSpace = wordSpacing * getSpacingTolerance(); } else { deltaSpace = (wordSpacing + lastWordSpacing) / 2f * getSpacingTolerance(); } } // Estimate the expected width of the space based on the average character width // with some margin. This calculation does not make a true average (average of // averages) but we found that it gave the best results after numerous experiments. // Based on experiments we also found that .3 worked well. float averageCharWidth; if (previousAveCharWidth < 0) { averageCharWidth = positionWidth / wordCharCount; } else { averageCharWidth = (previousAveCharWidth + positionWidth / wordCharCount) / 2f; } float deltaCharWidth = averageCharWidth * getAverageCharTolerance(); // Compares the values obtained by the average method and the wordSpacing method // and picks the smaller number. float expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE; if (endOfLastTextX != END_OF_LAST_TEXT_X_RESET_VALUE) { if (deltaCharWidth > deltaSpace) { expectedStartOfNextWordX = endOfLastTextX + deltaSpace; } else { expectedStartOfNextWordX = endOfLastTextX + deltaCharWidth; } } if (lastPosition != null) { if (startOfArticle) { lastPosition.setArticleStart(); startOfArticle = false; } // RDD - Here we determine whether this text object is on the current // line. We use the lastBaselineFontSize to handle the superscript // case, and the size of the current font to handle the subscript case. // Text must overlap with the last rendered baseline text by at least // a small amount in order to be considered as being on the same line. // XXX BC: In theory, this check should really check if the next char is in // full range seen in this line. This is what I tried to do with minYTopForLine, // but this caused a lot of regression test failures. So, I'm leaving it be for // now if (!overlap(positionY, positionHeight, maxYForLine, maxHeightForLine)) { writeLine(normalize(line)); line.clear(); lastLineStartPosition = handleLineSeparation(current, lastPosition, lastLineStartPosition, maxHeightForLine); expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE; maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE; maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE; minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE; } // test if our TextPosition starts after a new word would be expected to start if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE && expectedStartOfNextWordX < positionX && // only bother adding a space if the last character was not a space lastPosition.getTextPosition().getUnicode() != null && !lastPosition.getTextPosition().getUnicode().endsWith(" ")) { line.add(LineItem.getWordSeparator()); } } if (positionY >= maxYForLine) { maxYForLine = positionY; } // RDD - endX is what PDF considers to be the x coordinate of the // end position of the text. We use it in computing our metrics below. endOfLastTextX = positionX + positionWidth; // add it to the list if (characterValue != null) { if (startOfPage && lastPosition == null) { writeParagraphStart();// not sure this is correct for RTL? } line.add(new LineItem(position)); } maxHeightForLine = Math.max(maxHeightForLine, positionHeight); minYTopForLine = Math.min(minYTopForLine, positionY - positionHeight); lastPosition = current; if (startOfPage) { lastPosition.setParagraphStart(); lastPosition.setLineStart(); lastLineStartPosition = lastPosition; startOfPage = false; } lastWordSpacing = wordSpacing; previousAveCharWidth = averageCharWidth; } // print the final line if (line.size() > 0) { writeLine(normalize(line)); writeParagraphEnd(); } endArticle(); } writePageEnd(); }
From source file:uk.ac.diamond.scisoft.analysis.dataset.FloatDataset.java
/** * @return true if dataset contains any NaNs *///from ww w .j av a 2s . co m @Override public boolean containsNans() { IndexIterator iter = getIterator(); // REAL_ONLY while (iter.hasNext()) { // REAL_ONLY if (Float.isNaN(data[iter.index])) // CLASS_TYPE // REAL_ONLY return true; // REAL_ONLY } // REAL_ONLY return false; }
From source file:routines.system.BigDataParserUtils.java
public static Short parseTo_Short(float input) { if (Float.isNaN(input)) { return null; }/* w ww . j a v a2 s . c o m*/ return ((Float) input).shortValue(); }
From source file:com.androzic.location.LocationService.java
private void updateLocation(final ILocationListener callback) { if (!"unknown".equals(lastKnownLocation.getProvider())) callback.onLocationChanged(lastKnownLocation, isContinous, !Float.isNaN(nmeaGeoidHeight), smoothSpeed, avgSpeed);//www. ja v a 2s .co m }
From source file:org.fhcrc.cpl.viewer.ms2.commandline.PostProcessPepXMLCLM.java
/** * TODO: fold this in with loadLightHeavyPeptides * * This is some really weird stuff, right here. This method handles calculation of median log ratios, regardless * of whether you're doing it separately by file or all together, by number of cysteines or not. * It creates a map data structure that gets used down below, with a key for each file. If we're doing global * centering, the value gets repeated for each key. This is a bit silly, but it makes things simpler below. * * @return/* ww w.j a v a2 s . co m*/ * @throws CommandLineModuleExecutionException */ protected void calcLogMedianRatiosAllFiles() throws CommandLineModuleExecutionException { ApplicationContext .infoMessage("Calculating median log ratio(s) for all files, " + "this may take a while..."); if (medianCenterByNumCysteines) { fileNumCysteinesMedianLogRatioMap = new HashMap<File, Map<Integer, Float>>(); Map<Integer, List<Float>> numCysLogRatiosMapAllFiles = new HashMap<Integer, List<Float>>(); for (File featureFile : pepXmlFiles) { Map<Integer, List<Float>> numCysLogRatiosMapThisFile = new HashMap<Integer, List<Float>>(); ApplicationContext.infoMessage("\tProcessing file " + featureFile.getAbsolutePath() + "..."); try { Iterator<FeatureSet> featureSetIterator = new PepXMLFeatureFileHandler.PepXMLFeatureSetIterator( featureFile); while (featureSetIterator.hasNext()) { FeatureSet featureSet = featureSetIterator.next(); filterOnQualityScores(featureSet); for (Feature feature : featureSet.getFeatures()) { if (IsotopicLabelExtraInfoDef.hasRatio(feature) && MS2ExtraInfoDef.getPeptideProphet(feature) >= minPeptideProphetForMedian) { float ratio = (float) IsotopicLabelExtraInfoDef.getRatio(feature); if (Float.isInfinite(ratio) || (ratio == 0) || Float.isNaN(ratio)) continue; //don't consider to-be-stripped proteins in median ratio calculation List<String> proteins = MS2ExtraInfoDef.getProteinList(feature); boolean hasBadProtein = false; if (proteins != null) { for (String protein : proteins) { if ((proteinsToStripQuant != null && proteinsToStripQuant.contains(protein)) || (proteinsToStrip != null && proteinsToStrip.contains(protein))) { hasBadProtein = true; break; } } } if (hasBadProtein) continue; String peptide = MS2ExtraInfoDef.getFirstPeptide(feature); if (peptide != null) { if (peptidesToStrip != null && peptidesToStrip.contains(peptide)) continue; int numCysteines = 0; for (int i = 0; i < peptide.length(); i++) { if (peptide.charAt(i) == 'C') numCysteines++; } if (numCysteines > 0) { List<Float> logRatiosList = numCysLogRatiosMapThisFile.get(numCysteines); if (logRatiosList == null) { logRatiosList = new ArrayList<Float>(); numCysLogRatiosMapThisFile.put(numCysteines, logRatiosList); } logRatiosList.add((float) Math.log(ratio)); } } } } } if (medianCenterAllRunsTogether) { for (int numCys : numCysLogRatiosMapThisFile.keySet()) { List<Float> logRatiosThisNumCys = numCysLogRatiosMapAllFiles.get(numCys); if (logRatiosThisNumCys == null) { logRatiosThisNumCys = new ArrayList<Float>(); numCysLogRatiosMapAllFiles.put(numCys, logRatiosThisNumCys); } logRatiosThisNumCys.addAll(numCysLogRatiosMapThisFile.get(numCys)); } } else { HashMap<Integer, Float> numCysteinesMedianThisFile = new HashMap<Integer, Float>(); for (int numCys : numCysLogRatiosMapThisFile.keySet()) { numCysteinesMedianThisFile.put(numCys, (float) BasicStatistics.median(numCysLogRatiosMapThisFile.get(numCys))); } fileNumCysteinesMedianLogRatioMap.put(featureFile, numCysteinesMedianThisFile); } } catch (IOException e) { throw new CommandLineModuleExecutionException("Failed to load feature file " + featureFile); } } if (medianCenterAllRunsTogether) { Map<Integer, Float> numCysteinesMedianMap = new HashMap<Integer, Float>(); for (int numCys = 0; numCys < 10; numCys++) { if (!numCysLogRatiosMapAllFiles.containsKey(numCys)) continue; numCysteinesMedianMap.put(numCys, (float) BasicStatistics.median(numCysLogRatiosMapAllFiles.get(numCys))); if (showCharts) { new PanelWithHistogram(numCysLogRatiosMapAllFiles.get(numCys), "LogRatiosCys" + numCys, 200) .displayInTab(); } } for (File file : pepXmlFiles) { fileNumCysteinesMedianLogRatioMap.put(file, numCysteinesMedianMap); } ApplicationContext.infoMessage("Median log ratios by num Cysteines:"); for (int i = 0; i < 20; i++) { if (numCysteinesMedianMap.containsKey(i)) { ApplicationContext.infoMessage(i + ": " + numCysteinesMedianMap.get(i) + " (" + numCysLogRatiosMapAllFiles.get(i).size() + " events)"); } } } else ApplicationContext .infoMessage("Separate median log ratio (by #Cysteines) per file, not displaying."); } //end if (medianCenterByNumCysteines) else { List<Float> logRatiosForMedianCalc = new ArrayList<Float>(); fileMedianLogRatioMap = new HashMap<File, Float>(); int fileIndex = 1; for (File featureFile : pepXmlFiles) { List<Float> logRatiosForMedianCalcThisFile = new ArrayList<Float>(); ApplicationContext.infoMessage("\tProcessing file " + featureFile.getAbsolutePath() + "..."); try { Iterator<FeatureSet> featureSetIterator = new PepXMLFeatureFileHandler.PepXMLFeatureSetIterator( featureFile); while (featureSetIterator.hasNext()) { FeatureSet featureSet = featureSetIterator.next(); filterOnQualityScores(featureSet); for (Feature feature : featureSet.getFeatures()) { if (IsotopicLabelExtraInfoDef.hasRatio(feature) && MS2ExtraInfoDef.getPeptideProphet(feature) >= minPeptideProphetForMedian) { float ratio = (float) IsotopicLabelExtraInfoDef.getRatio(feature); if (!Float.isInfinite(ratio) && !Float.isNaN(ratio) && ratio != 0) { logRatiosForMedianCalcThisFile.add((float) Math.log(ratio)); } } } } } catch (IOException e) { throw new CommandLineModuleExecutionException("Failed to load feature file " + featureFile); } if (!medianCenterAllRunsTogether) { if (logRatiosForMedianCalcThisFile.size() < minRatiosForMedianCenter) throw new CommandLineModuleExecutionException( "Not enough ratios to calculate median for file " + featureFile.getAbsolutePath() + " (only " + logRatiosForMedianCalcThisFile.size() + " ratios, needed " + minRatiosForMedianCenter + ")"); else { float medianLogRatioThisFile = (float) BasicStatistics .median(logRatiosForMedianCalcThisFile); ApplicationContext.infoMessage("Median log ratio for file " + featureFile.getName() + ": " + medianLogRatioThisFile); fileMedianLogRatioMap.put(featureFile, medianLogRatioThisFile); if (showCharts) { PanelWithHistogram pwh = new PanelWithHistogram(logRatiosForMedianCalcThisFile, "RAW Log Ratios " + fileIndex++, 200); pwh.displayInTab(); } } } else logRatiosForMedianCalc.addAll(logRatiosForMedianCalcThisFile); } if (medianCenterAllRunsTogether) { if (logRatiosForMedianCalc.size() < minRatiosForMedianCenter) throw new CommandLineModuleExecutionException("Not enough ratios to calculate median (only " + logRatiosForMedianCalc.size() + " ratios, needed " + minRatiosForMedianCenter + ")"); //assign the same median to each file. This keeps code same down below float medianLogRatioAcrossAll = (float) BasicStatistics.median(logRatiosForMedianCalc); ApplicationContext.infoMessage("Median log ratio across all runs: " + medianLogRatioAcrossAll); for (File featureFile : pepXmlFiles) fileMedianLogRatioMap.put(featureFile, medianLogRatioAcrossAll); if (showCharts) { PanelWithHistogram pwh = new PanelWithHistogram(logRatiosForMedianCalc, "RAW Log Ratios", 200); pwh.displayInTab(); } } } //end not-by-cysteines behavior ApplicationContext.infoMessage("Done calculating median log ratio across all files."); }