FeatureExtraction.FeatureExtractorNgramsByte.java Source code

Java tutorial

Introduction

Here is the source code for FeatureExtraction.FeatureExtractorNgramsByte.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package FeatureExtraction;

import IO.Console;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;

/**
 *
 * @author Aviad
 */
public class FeatureExtractorNgramsByte<T> extends AFeatureExtractor<T> {

    private final long serialVersionUID = 1L;
    private final int m_grams; //the gram size to be used
    private final int m_skip; //the skip to be used

    public FeatureExtractorNgramsByte(int gram_size, int skip) {
        m_grams = gram_size;
        m_skip = skip;
    }

    /**
     * Return list of n-grams (and occurrences) extracted from the given source
     *
     * @param element the type of object represent the element that the features
     * should be extracted from
     * @return list of n-grams (and occurrences) extracted from the given source
     */
    @Override
    public Map<String, Integer> ExtractFeaturesFrequencyFromSingleElement(T element) {
        Map<String, Integer> ngrams = new HashMap<>();

        String filePath = (String) element;
        File file = new File(filePath);

        if (file.exists()) {
            try {
                byte[] fileBytes = FileUtils.readFileToByteArray(file);
                String ngram = "";
                for (int i = 0; i <= fileBytes.length - m_grams; i = i + m_skip) {
                    for (int j = 0; j < m_grams; j++) {
                        ngram += (char) fileBytes[i + j];
                    }
                    if (!ngrams.containsKey(ngram)) {
                        ngrams.put(ngram, 1);
                    } else {
                        ngrams.put(ngram, ngrams.get(ngram) + 1);
                    }
                    ngram = "";
                }
            } catch (IOException e) {
                Console.PrintException(
                        String.format("Error extracting Byte n-grams features from file: %s", filePath), e);
            }
        }
        return ngrams;
    }

    @Override
    public String GetName() {
        return String.format("Byte n-gram (grams=%s skip=%s)", m_grams, m_skip);
    }

}