Algorithm.SegmentationAndNounFilter.java Source code

Java tutorial

Introduction

Here is the source code for Algorithm.SegmentationAndNounFilter.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package Algorithm;

import ICTCLAS.I3S.AC.ICTCLAS50;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Iterator;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

/**
 *
 * @author miss lulu
 */
public class SegmentationAndNounFilter {
    public String[] SegmentationNounFilter(String filepath, String filename) {
        try {

            String mingciPath = filepath.replace(filename, "???.xlsx");
            ICTCLAS50 testICTCLAS50 = new ICTCLAS50();
            String argu = ".";
            if (testICTCLAS50.ICTCLAS_Init(argu.getBytes("GB2312")) == false) {
                System.out.println("Init Fail");
            } else {
                System.out.println("Init Succeed!");
            }

            StringBuffer input = new StringBuffer();
            FileInputStream file = new FileInputStream(new File(filepath));
            XSSFWorkbook workbook = new XSSFWorkbook(file);
            XSSFSheet sheet1 = workbook.getSheetAt(0);
            Iterator<Row> rowIterator = sheet1.iterator();
            while (rowIterator.hasNext()) {
                Row row = rowIterator.next();
                //Row rowNew =sheetNew.createRow(rowNumNew++);
                //For each row, iterate through all the columns
                Iterator<Cell> cellIterator = row.cellIterator();
                while (cellIterator.hasNext()) {
                    Cell cell = cellIterator.next();
                    // Cell cellNew =rowNew.createCell(cellNumNew++);
                    //Check the cell type and format accordingly
                    switch (cell.getCellType()) {
                    case Cell.CELL_TYPE_NUMERIC:
                        break;
                    case Cell.CELL_TYPE_STRING:
                        input.append(cell.getStringCellValue());

                    }
                }

                //System.out.println("");
            }
            //?
            byte nativeBytes[] = testICTCLAS50.ICTCLAS_ParagraphProcess(input.toString().getBytes("GB2312"), 0, 1);

            String nativeStr = new String(nativeBytes, 0, nativeBytes.length, "GB2312");

            //xlsx
            XSSFWorkbook workbookNew = new XSSFWorkbook();
            XSSFSheet sheetNew = workbookNew.createSheet("test");
            int rowNum = 0;

            // WritableWorkbook book = Workbook.createWorkbook(new File("n.xls"));
            // WritableSheet sheet = book.createSheet("num1", 0);
            Scanner in = new Scanner(nativeStr);
            int i = 0;//
            while (in.hasNext()) {
                String ss = in.next();

                Pattern pattern = Pattern.compile("(.+?)/n.*");
                Matcher matcher = pattern.matcher(ss);
                if (matcher.find() && matcher.group(1).length() > 1 && !isDigit(matcher.group(1))) {

                    //label = new jxl.write.Label(0, i, matcher.group(1));//?
                    //sheet.addCell(label);
                    Row rowNew = sheetNew.createRow(rowNum++);
                    Cell cellNew = rowNew.createCell(0);
                    cellNew.setCellValue(matcher.group(1));
                    //i++;

                }
            }

            //    book.write();
            //   book.close();
            FileOutputStream fileOut = new FileOutputStream(new File(mingciPath));//new file
            workbookNew.write(fileOut);
            fileOut.close();

            //??            

            file.close();

            testICTCLAS50.ICTCLAS_Exit();

        } catch (Exception ex) {

        }

        return null;

    }

    private boolean isDigit(String strNum) {
        return strNum.matches("[0-9]{1,}");
    }

}