Java tutorial
/*************************************************************************** Copyright (c) 2016, EPAM SYSTEMS INC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ****************************************************************************/ package com.epam.dlab.module; import java.util.ArrayList; import java.util.List; import javax.validation.constraints.NotNull; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.epam.dlab.core.parser.ParserByLine; import com.epam.dlab.exceptions.AdapterException; import com.epam.dlab.exceptions.InitializationException; import com.epam.dlab.exceptions.ParseException; import com.fasterxml.jackson.annotation.JsonClassDescription; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import com.google.common.base.MoreObjects.ToStringHelper; /** Parse CSV format to common CSV format. */ @JsonTypeName(ModuleName.PARSER_CSV) @JsonClassDescription("CSV parser.\n" + "Parse source CSV format to common billing report.\n" + " - type: " + ModuleName.PARSER_CSV + "\n" + " [dataFile: <filename>] - the file name to store working data of parser.]\n" + " [columnStartDate: <column_name>] - the name of source column with date of data.]\n" + " [columnMapping: >-\n" + " <targetColumn1=sourceColumnX;targetColumn2=sourceColumnY; ...;\n" + " tags=sourceColumnK,...,sourceColumnN>]\n" + " - columns mapping to target from source columns.\n" + " Know target columns: dlab_id, user,\n" + " usage_date, product, usage_type, usage, cost,\n" + " currency_code, resource_id, tags.\n" + " [whereCondition: >-\n" + " <(source_columnX > 0.0 || source_columnY == 'string') &&\n" + " source_columnZ != 2016>]\n" + " - where condition for filtering the source data,\n" + " see http://commons.apache.org/proper/commons-jexl/reference/syntax.html#Operators\n" + " for detais.\n" + " [aggregate: <none | month | day>] - how to aggregate the data.\n" + " [headerLineNo: <number>] - the number of header line in source data.\n" + " [skipLines: <numbber>] - the number of line which will be skipped\n" + " (include header).\n" + " [fieldSeparator: <char>] - char for separate field names and values.\n" + " [fieldTerminator: <char>] - char for terminate field names and values.\n" + " [escapeChar: <char>] - escape char.\n" + " [decimalSeparator: <char>] - char for decimal sign.\n" + " [groupingSeparator: <char>] - char for thousands separator.\n") public class ParserCsv extends ParserByLine { private static final Logger LOGGER = LoggerFactory.getLogger(ParserCsv.class); /** Character for separate field names and values. */ public static final char FIELD_SEPARATOR_DEFAULT = ','; /** Character for termination field names and values. */ public static final char FIELD_DELIMITER_DEFAULT = '"'; /** Escape character. */ public static final char ESCAPE_CHAR_DEFAULT = '\\'; /** Character for separate field names and values. */ @NotNull @JsonProperty private char fieldSeparator = FIELD_SEPARATOR_DEFAULT; /** Character for termination field names and values. */ @NotNull @JsonProperty private char fieldTerminator = FIELD_DELIMITER_DEFAULT; /** Escape character. */ @NotNull @JsonProperty private char escapeChar = ESCAPE_CHAR_DEFAULT; /** The number of line that contain the header of data.*/ @JsonProperty private int headerLineNo = 0; /** The number of line which will be skipped (include header).*/ @JsonProperty private int skipLines = 0; /** Return the character for separate field names and values. */ public char getFieldSeparator() { return fieldSeparator; } /** Set the character for separate field names and values. */ public void setFieldSeparator(char fieldSeparator) { this.fieldSeparator = fieldSeparator; } /** Return the character for termination field names and values. */ public char getFieldTerminator() { return fieldTerminator; } /** Set the character for termination field names and values. */ public void setFieldTerminator(char fieldTerminator) { this.fieldTerminator = fieldTerminator; } /** Return the escape character. */ public char getEscapeChar() { return escapeChar; } /** Set the escape character. */ public void setEscapeChar(char escapeChar) { this.escapeChar = escapeChar; } /** Return the number of line that contain the header of data.*/ public int getHeaderLineNo() { return headerLineNo; } /** Set the number of line that contain the header of data.*/ public void setHeaderLineNo(int headerLineNo) { this.headerLineNo = headerLineNo; } /** Return the number of line which will be skipped (include header).*/ public int getSkipLines() { return skipLines; } /** Set the number of line which will be skipped (include header).*/ public void setSkipLines(int skipLines) { this.skipLines = skipLines; } @Override public void initialize() throws InitializationException { } @Override public List<String> parseHeader() throws AdapterException, ParseException { String line = null; List<String> header = null; if (headerLineNo > 0) { while (getCurrentStatistics().getRowReaded() < headerLineNo) { if ((line = getNextRow()) == null) { return null; } getCurrentStatistics().incrRowSkipped(); } header = parseRow(line); } while (getCurrentStatistics().getRowReaded() < skipLines) { if (getNextRow() == null) { break; } getCurrentStatistics().incrRowSkipped(); } return header; } /** Construct the exception. * @param message the error message. * @param pos the position in the parsed line. * @param sourceLine the parsed line. * @return ParseException */ private ParseException getParseException(String message, int pos, String sourceLine) { String s = String.format("%s at pos %d in line: ", message, pos); LOGGER.error(s + sourceLine); LOGGER.error(StringUtils.repeat(' ', s.length() + pos - 1) + '^'); return new ParseException(s + sourceLine); } @Override public List<String> parseRow(String line) throws ParseException { int realPos = 0; int pos = 0; boolean isDelimiter = false; StringBuilder sb = new StringBuilder(line); List<String> row = new ArrayList<String>(); while (pos < sb.length()) { char c = sb.charAt(pos); /* LOGGER.debug("Current buffer {}", sb); LOGGER.debug("pos {}", pos); LOGGER.debug("isDelimiter {}", isDelimiter); */ if (c == escapeChar) { realPos++; pos++; if (pos == sb.length()) { throw getParseException("Invalid escape char", realPos, line); } sb.delete(pos - 1, pos); realPos++; } else if (c == fieldTerminator) { realPos++; if (isDelimiter) { realPos++; pos++; if (pos == sb.length()) { sb.delete(pos - 1, pos); break; } if (sb.charAt(pos) == fieldSeparator) { row.add(sb.substring(0, pos - 1)); sb.delete(0, pos + 1); pos = 0; isDelimiter = false; continue; } throw getParseException("Invalid field delimiter", realPos, line); } if (pos != 0) { throw getParseException("Unterminated field", realPos, line); } sb.delete(0, 1); isDelimiter = true; continue; } else if (c == fieldSeparator) { realPos++; if (isDelimiter) { pos++; continue; } row.add(sb.substring(0, pos)); sb.delete(0, pos + 1); pos = 0; } else { realPos++; pos++; } } row.add(sb.toString()); return row; } @Override public ToStringHelper toStringHelper(Object self) { return super.toStringHelper(self).add("fieldSeparator", fieldSeparator) .add("fieldTerminator", fieldTerminator).add("escapeChar", escapeChar) .add("headerLineNo", headerLineNo).add("skipLines", skipLines); } }