Java tutorial
/* Copyright 2017 Daniel Davis * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package net.danizen.norconex.committer.kafka; import java.io.IOException; import java.util.Properties; import java.util.ArrayList; import java.util.List; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import org.apache.commons.configuration.XMLConfiguration; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerRecord; import com.norconex.committer.core.AbstractMappedCommitter; import com.norconex.committer.core.CommitterException; import com.norconex.committer.core.IAddOperation; import com.norconex.committer.core.ICommitOperation; import com.norconex.committer.core.IDeleteOperation; /** * <p> * Commits documents to Kafka using Producer API * </p> * <p> * Despite being a subclass * of {@link AbstractMappedCommitter}, setting an <code>idTargetField</code> * is not supported. * </p> * <p> * As of 3.0.0, XML configuration entries expecting millisecond durations * can be provided in human-readable format (English only), as per * {@link DurationParser} (e.g., "5 minutes and 30 seconds" or "5m30s"). * </p> * <h3>XML configuration usage:</h3> * * <pre> * <committer class="com.norconex.committer.elasticsearch.ElasticsearchCommitter"> * <indexName>(Name of the index to use)</indexName> * <typeName>(Name of the type to use)</typeName> * <clusterName> * (Name of the ES cluster to join. Default is "elasticsearch".) * </clusterName> * <clusterHosts> * (Comma delimited list of hosts to connect to join the cluster. * Default is "localhost".) * </clusterHosts> * <sourceReferenceField keep="[false|true]"> * (Optional name of field that contains the document reference, when * the default document reference is not used. The reference value * will be mapped to the Elasticsearch ID field. * Once re-mapped, this metadata source field is * deleted, unless "keep" is set to <code>true</code>.) * </sourceReferenceField> * <sourceContentField keep="[false|true]"> * (If you wish to use a metadata field to act as the document * "content", you can specify that field here. Default * does not take a metadata field but rather the document content. * Once re-mapped, the metadata source field is deleted, * unless "keep" is set to <code>true</code>.) * </sourceContentField> * <targetContentField> * (Target repository field name for a document content/body. * Default is "content".) * </targetContentField> * <commitBatchSize> * (max number of documents to send to Elasticsearch at once) * </commitBatchSize> * <queueDir>(optional path where to queue files)</queueDir> * <queueSize>(max queue size before committing)</queueSize> * <maxRetries>(max retries upon commit failures)</maxRetries> * <maxRetryWait>(max delay in milliseconds between retries)</maxRetryWait> * </committer> * </pre> * * @author Pascal Dimassimo * @author Pascal Essiembre */ public class KafkaCommitter extends AbstractMappedCommitter { private static final Logger logger = LogManager.getLogger(KafkaCommitter.class); private static final String DFLT_KAFKA_SERIALIZER = "org.apache.kafka.common.serialization.StringSerializer"; private String topicName; private String brokerList; private KafkaProducer<String, String> producer; /** * Constructor. */ public KafkaCommitter() { } public String getTopicName() { return topicName; } public void setTopicName(String topicName) { this.topicName = topicName; } public String getBrokerList() { return brokerList; } public void setBrokerList(String brokerList) { this.brokerList = brokerList; } /** * Responsible to create the producer based * on the parameters. */ public KafkaProducer<String, String> createProducer() { if (producer == null) { Properties props = new Properties(); props.put("bootstrap.servers", getBrokerList()); props.put("acks", "all"); props.put("retries", 0); props.put("batch.size", getCommitBatchSize()); props.put("linger.ms", 250); props.put("buffer.memory", 1 * 1024 * 1024); props.put("key.serializer", DFLT_KAFKA_SERIALIZER); props.put("value.serializer", DFLT_KAFKA_SERIALIZER); producer = new KafkaProducer<String, String>(props); } return producer; } @Override protected void commitBatch(List<ICommitOperation> batch) { for (ICommitOperation baseop : batch) { // NOTE: Probably need a marshalling class here that is pluggable if (baseop instanceof IAddOperation) { IAddOperation op = (IAddOperation) baseop; ProducerRecord<String, String> oprec = new ProducerRecord<String, String>(getTopicName(), op.getReference(), op.getReference()); producer.send(oprec); } else if (baseop instanceof IDeleteOperation) { IDeleteOperation op = (IDeleteOperation) baseop; ProducerRecord<String, String> oprec = new ProducerRecord<String, String>(getTopicName(), op.getReference(), op.getReference()); producer.send(oprec); } } } @Override protected void saveToXML(XMLStreamWriter writer) throws XMLStreamException { if (StringUtils.isNotBlank(topicName)) { writer.writeStartElement("topicName"); writer.writeCharacters(topicName); writer.writeEndElement(); } if (StringUtils.isNotBlank(brokerList)) { writer.writeStartElement("brokerList"); writer.writeCharacters(brokerList); writer.writeEndElement(); } } @Override protected void loadFromXml(XMLConfiguration xml) { setTopicName(xml.getString("topicName", null)); setBrokerList(xml.getString("brokerList", null)); } @Override public int hashCode() { return new HashCodeBuilder().appendSuper(super.hashCode()).append(topicName).append(brokerList) .toHashCode(); } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (!(obj instanceof KafkaCommitter)) { return false; } KafkaCommitter other = (KafkaCommitter) obj; return new EqualsBuilder().appendSuper(super.equals(obj)).append(topicName, other.topicName) .append(brokerList, other.brokerList).isEquals(); } @Override public String toString() { return new ToStringBuilder(this).appendSuper(super.toString()).append("topicName", topicName) .append("brokerList", brokerList).toString(); } }