List of usage examples for java.util Vector elementAt
public synchronized E elementAt(int index)
From source file:io.snappydata.hydra.cluster.SnappyTest.java
/** * Executes user SQL scripts.//from w w w . j ava 2 s. com */ public static synchronized void HydraTask_executeSQLScripts() { Vector scriptNames, dataLocationList = null, persistenceModeList = null, colocateWithOptionList = null, partitionByOptionList = null, numPartitionsList = null, redundancyOptionList = null, recoverDelayOptionList = null, maxPartitionSizeList = null, evictionByOptionList = null; File log = null, logFile = null; scriptNames = SnappyPrms.getSQLScriptNames(); if (scriptNames == null) { String s = "No Script names provided for executing in the Hydra TASK"; throw new TestException(s); } try { dataLocationList = SnappyPrms.getDataLocationList(); persistenceModeList = SnappyPrms.getPersistenceModeList(); colocateWithOptionList = SnappyPrms.getColocateWithOptionList(); partitionByOptionList = SnappyPrms.getPartitionByOptionList(); numPartitionsList = SnappyPrms.getNumPartitionsList(); redundancyOptionList = SnappyPrms.getRedundancyOptionList(); recoverDelayOptionList = SnappyPrms.getRecoverDelayOptionList(); maxPartitionSizeList = SnappyPrms.getMaxPartitionSizeList(); evictionByOptionList = SnappyPrms.getEvictionByOptionList(); if (dataLocationList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \" \" parameter in the dataLocationList for the scripts for which no dataLocation is specified."); while (dataLocationList.size() != scriptNames.size()) dataLocationList.add(" "); } if (persistenceModeList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \"async\" parameter in the persistenceModeList for the scripts for which no persistence mode is specified."); while (persistenceModeList.size() != scriptNames.size()) persistenceModeList.add("async"); } if (colocateWithOptionList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \"none\" parameter in the colocateWithOptionList for the scripts for which no COLOCATE_WITH Option is specified."); while (colocateWithOptionList.size() != scriptNames.size()) colocateWithOptionList.add("none"); } if (partitionByOptionList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \" \" parameter in the partitionByOptionList for the scripts for which no PARTITION_BY option is specified."); while (partitionByOptionList.size() != scriptNames.size()) partitionByOptionList.add(" "); } if (numPartitionsList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \"113\" parameter in the partitionByOptionsList for the scripts for which no BUCKETS option is specified."); while (numPartitionsList.size() != scriptNames.size()) numPartitionsList.add("113"); } if (redundancyOptionList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \" \" parameter in the redundancyOptionList for the scripts for which no REDUNDANCY option is specified."); while (redundancyOptionList.size() != scriptNames.size()) redundancyOptionList.add(" "); } if (recoverDelayOptionList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \" \" parameter in the recoverDelayOptionList for the scripts for which no RECOVER_DELAY option is specified."); while (recoverDelayOptionList.size() != scriptNames.size()) recoverDelayOptionList.add(" "); } if (maxPartitionSizeList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \" \" parameter in the maxPartitionSizeList for the scripts for which no MAX_PART_SIZE option is specified."); while (maxPartitionSizeList.size() != scriptNames.size()) maxPartitionSizeList.add(" "); } if (evictionByOptionList.size() != scriptNames.size()) { Log.getLogWriter().info( "Adding \"LRUHEAPPERCENT\" parameter in the evictionByOptionList for the scripts for which no EVICTION_BY option is specified."); while (evictionByOptionList.size() != scriptNames.size()) evictionByOptionList.add("LRUHEAPPERCENT"); } for (int i = 0; i < scriptNames.size(); i++) { String userScript = (String) scriptNames.elementAt(i); String location = (String) dataLocationList.elementAt(i); String persistenceMode = (String) persistenceModeList.elementAt(i); String colocateWith = (String) colocateWithOptionList.elementAt(i); String partitionBy = (String) partitionByOptionList.elementAt(i); String numPartitions = (String) numPartitionsList.elementAt(i); String redundancy = (String) redundancyOptionList.elementAt(i); String recoverDelay = (String) recoverDelayOptionList.elementAt(i); String maxPartitionSize = (String) maxPartitionSizeList.elementAt(i); String evictionByOption = (String) evictionByOptionList.elementAt(i); String dataLocation = snappyTest.getDataLocation(location); String filePath = snappyTest.getScriptLocation(userScript); log = new File("."); String dest = log.getCanonicalPath() + File.separator + "sqlScriptsResult.log"; logFile = new File(dest); String primaryLocatorHost = (String) SnappyBB.getBB().getSharedMap().get("primaryLocatorHost"); String primaryLocatorPort = (String) SnappyBB.getBB().getSharedMap().get("primaryLocatorPort"); ProcessBuilder pb = new ProcessBuilder(SnappyShellPath, "run", "-file=" + filePath, "-param:dataLocation=" + dataLocation, "-param:persistenceMode=" + persistenceMode, "-param:colocateWith=" + colocateWith, "-param:partitionBy=" + partitionBy, "-param:numPartitions=" + numPartitions, "-param:redundancy=" + redundancy, "-param:recoverDelay=" + recoverDelay, "-param:maxPartitionSize=" + maxPartitionSize, "-param:evictionByOption=" + evictionByOption, "-client-port=" + primaryLocatorPort, "-client-bind-address=" + primaryLocatorHost); snappyTest.executeProcess(pb, logFile); } } catch (IOException e) { throw new TestException("IOException occurred while retriving destination logFile path " + log + "\nError Message:" + e.getMessage()); } }
From source file:dao.CollabrumDaoDb.java
private void deleteRBCollabrum(String directoryId, String collabrumId, String userId, String userLogin) throws BaseDaoException { if (RegexStrUtil.isNull(userId) || RegexStrUtil.isNull(collabrumId) || RegexStrUtil.isNull(directoryId) || RegexStrUtil.isNull(userLogin)) { throw new BaseDaoException("params are null"); }//from w w w . jav a 2 s .c o m List tidList = getTidList(collabrumId); List blobEntryList = getBlobEntryList(collabrumId); Vector ridVector = new Vector(); for (int i = 0; i < tidList.size(); i++) { /* get list of rids from collmessages */ List ridList = getRidList((String) ((ColTopic) tidList.get(i)).getValue(DbConstants.TID)); ridVector.add(ridList); } /** * get the members list from collmembers, then access each record in this table * collblock (deleteAllColBlockQuery) partitioned on loginid * deleteColBlockQuery.run(conn, collabrumId); */ deleteBlockedMembers(collabrumId); /** * Get scalability datasource with no partitions for colladmin, collmembers, dircoll, collabrum */ String sourceName = scalabilityManager.getWriteZeroScalability(); ds = scalabilityManager.getSource(sourceName); if (ds == null) { StringBuffer sb = new StringBuffer("ds is null, deleteCollabrum() "); sb.append(sourceName); sb.append(" collabrumId = "); sb.append(collabrumId); throw new BaseDaoException(sb.toString()); } HashSet result = null; Connection conn = null; /** * non partitioned tables */ try { conn = ds.getConnection(); conn.setAutoCommit(false); result = listModeratorQuery.run(conn, collabrumId); /** * Not partitioned * collabrum, (deleteQuery) * colladmin (deleteAdminQuery) * dircoll (deleteDirColQuery) * collmembers (deleteColMembersQuery), * * collblobtags (deleteColBlobTagsQuery) * collblogtags (deleteColBlogTagsQuery) * collabrum_ind, (deleteCollabrumIndexQuery) * collblob_ind, (deleteColBlobIndexQuery) * collmessages_ind, (deleteColMessagesIndexQuery) * colltopics_ind, (deleteColTopicsIndexQuery) */ deleteQuery.run(conn, collabrumId); deleteAdminQuery.run(conn, collabrumId); deleteDircollQuery.run(conn, collabrumId); deleteAllMembersQuery.run(conn, collabrumId); /* new ones */ deleteColBlobTagsQuery.run(conn, collabrumId); deleteColBlogTagsQuery.run(conn, collabrumId); deleteCollabrumIndexQuery.run(conn, collabrumId); for (int i = 0; i < blobEntryList.size(); i++) { deleteColBlobIndexQuery.run(conn, (String) ((Photo) blobEntryList.get(i)).getValue(DbConstants.ENTRYID)); } for (int i = 0; i < tidList.size(); i++) { deleteColTopicsIndexQuery.run(conn, (String) ((ColTopic) tidList.get(i)).getValue(DbConstants.TID)); } for (int i = 0; i < ridVector.size(); i++) { List ridList = (List) ridVector.elementAt(i); for (int j = 0; i < ridList.size(); j++) { deleteColMessagesIndexQuery.run(conn, (String) ((ColMessage) ridList.get(j)).getValue(DbConstants.RID)); } } } catch (Exception e) { try { conn.rollback(); } catch (Exception e1) { try { if (conn != null) { conn.setAutoCommit(true); conn.close(); } } catch (Exception e2) { StringBuffer sb = new StringBuffer( "conn.close exception for rollback(), for deleteCollabrum() "); sb.append("collabrumId = "); sb.append(collabrumId); sb.append(" userId = "); sb.append(userId); throw new BaseDaoException(sb.toString(), e2); } StringBuffer sb = new StringBuffer(" rollback() exception, for deleteCollabrum() "); sb.append("collabrumId = "); sb.append(collabrumId); sb.append(" userId = "); sb.append(userId); throw new BaseDaoException(sb.toString(), e1); } } // connection commit try { conn.commit(); } catch (Exception e3) { StringBuffer sb = new StringBuffer(" commit() exception, for deleteCollabrum() collabrumId = "); sb.append(collabrumId); sb.append(" userId = "); sb.append(userId); throw new BaseDaoException(sb.toString(), e3); } try { if (conn != null) { conn.setAutoCommit(true); conn.close(); } } catch (Exception e4) { StringBuffer sb = new StringBuffer( " conn.close() exception, for commit(), deleteCollabrum() collabrumId = "); sb.append(collabrumId); sb.append(" userId = "); sb.append(userId); throw new BaseDaoException(sb.toString(), e4); } deleteCollMessages(collabrumId, tidList); deleteCollTopics(collabrumId, tidList); /** * Jboss methods * fqn - full qualified name * check if the collabrum already exists in the cache * If it exists, remove the collabrum from the cache */ Fqn fqn = cacheUtil.fqn(DbConstants.COLLABRUM); if (treeCache.exists(fqn, collabrumId)) { treeCache.remove(fqn, collabrumId); } fqn = cacheUtil.fqn(DbConstants.ORGANIZERS); if (treeCache.exists(fqn, collabrumId)) { treeCache.remove(fqn, collabrumId); } fqn = cacheUtil.fqn(DbConstants.COLLABRUM_EDIT); if (treeCache.exists(fqn, collabrumId)) { treeCache.remove(fqn, collabrumId); } fqn = cacheUtil.fqn(DbConstants.DIRECTORY); if (treeCache.exists(fqn, directoryId)) { treeCache.remove(fqn, directoryId); } fqn = cacheUtil.fqn(DbConstants.COLTOPICS); if (treeCache.exists(fqn, collabrumId)) { treeCache.remove(fqn, collabrumId); } fqn = cacheUtil.fqn(DbConstants.COLTRAFFIC); if (treeCache.exists(fqn, collabrumId)) { treeCache.remove(fqn, collabrumId); } /** * delete collabrum messages */ for (int i = 0; i < tidList.size(); i++) { StringBuffer sb = new StringBuffer(collabrumId); sb.append("-"); sb.append(tidList.get(i)); String key = sb.toString(); fqn = cacheUtil.fqn(DbConstants.COLMSGS); if (treeCache.exists(fqn, key)) { treeCache.remove(fqn, key); } fqn = cacheUtil.fqn(DbConstants.COLTOPIC); if (treeCache.exists(fqn, key)) { treeCache.remove(fqn, key); } } fqn = cacheUtil.fqn(DbConstants.COLLABRUM_STREAM_BLOBS); if (treeCache.exists(fqn, collabrumId)) { treeCache.remove(fqn, collabrumId); } // deleting user pages for each admin as we want them to be updated if ((result != null) && (result.size() > 0)) { Iterator it = result.iterator(); StringBuffer sb = new StringBuffer(); while (it.hasNext()) { Collabrum collabrum = (Collabrum) it.next(); String adminUser = collabrum.getValue(DbConstants.LOGIN); if (!RegexStrUtil.isNull(adminUser)) { fqn = cacheUtil.fqn(DbConstants.USER_PAGE); if (treeCache.exists(fqn, adminUser)) { treeCache.remove(fqn, adminUser); } fqn = cacheUtil.fqn(DbConstants.MEM_AS_ORGANIZER_LIST); if (treeCache.exists(fqn, adminUser)) { treeCache.remove(fqn, adminUser); } fqn = cacheUtil.fqn(DbConstants.MEM_AS_MODERATOR_LIST); if (treeCache.exists(fqn, adminUser)) { treeCache.remove(fqn, adminUser); } String adminId = collabrum.getValue(DbConstants.LOGIN_ID); fqn = cacheUtil.fqn(DbConstants.BLOCKED_COLLABRUM_LIST); if (treeCache.exists(fqn, adminId)) { treeCache.remove(fqn, adminId); } // delete organizer key = collabrumid-memberid sb.delete(0, sb.length()); sb.append(collabrumId); sb.append("-"); sb.append(adminId); fqn = cacheUtil.fqn(DbConstants.ORGANIZER); if (treeCache.exists(fqn, sb.toString())) { treeCache.remove(fqn, sb.toString()); } } } } fqn = cacheUtil.fqn(DbConstants.COLLABRUM_LIST); if (treeCache.exists(fqn, directoryId)) { treeCache.remove(fqn, directoryId); } /** * Jboss methods - * fqn - full qualified name * check if the streamblob already set in the cache * If it exists, remove the bean from the cache. */ for (int i = 0; i < blobEntryList.size(); i++) { String entryId = (String) ((Photo) blobEntryList.get(i)).getValue(DbConstants.ENTRYID); fqn = cacheUtil.fqn(DbConstants.PHOTO); if (treeCache.exists(fqn, entryId)) { treeCache.remove(fqn, entryId); } StringBuffer buf = new StringBuffer(collabrumId); buf.append("-"); buf.append(entryId); String key = buf.toString(); fqn = cacheUtil.fqn(DbConstants.COL_STREAM_BLOB); if (treeCache.exists(fqn, key)) { treeCache.remove(fqn, key); } fqn = cacheUtil.fqn(DbConstants.DEFAULT_PHOTO); if (treeCache.exists(fqn, key)) { treeCache.remove(fqn, key); } } fqn = cacheUtil.fqn(DbConstants.COLL_CAT); StringBuffer sb = new StringBuffer(collabrumId); sb.append("-"); sb.append(DbConstants.PHOTO_CATEGORY); if (treeCache.exists(fqn, sb.toString())) { treeCache.remove(fqn, sb.toString()); } sb.delete(0, sb.length()); sb.append(collabrumId); sb.append("-"); sb.append(DbConstants.FILE_CATEGORY); if (treeCache.exists(fqn, sb.toString())) { treeCache.remove(fqn, sb.toString()); } }
From source file:org.executequery.gui.importexport.ImportDelimitedWorker.java
private Object doWork() { // the process result String processResult = null;/*from ww w . j av a 2 s. c o m*/ // are we halting on any error int onError = getParent().getOnError(); haltOnError = (onError == ImportExportProcess.STOP_TRANSFER); boolean isBatch = getParent().runAsBatchProcess(); appendProgressText("Beginning import from delimited file process..."); appendProgressText("Using connection: " + getParent().getDatabaseConnection().getName()); // --------------------------------------- // table specific counters // the table statement result int tableInsertCount = 0; // the records processed for this table int tableRowCount = 0; // the table commit count int tableCommitCount = 0; // --------------------------------------- // total import process counters // the current commit block size int commitCount = 0; // the total records inserted int totalInsertCount = 0; // the total records processed int totalRecordCount = 0; // the error count int errorCount = 0; // the current line number int lineNumber = 0; int rollbackSize = getParent().getRollbackSize(); int rollbackCount = 0; FileReader fileReader = null; BufferedReader reader = null; DateFormat dateFormat = null; try { // retrieve the import files Vector files = getParent().getDataFileVector(); int fileCount = files.size(); // whether to trim whitespace boolean trimWhitespace = getParent().trimWhitespace(); // whether this table has a date/time field boolean hasDate = false; // whether we are parsing date formats boolean parsingDates = parseDateValues(); // column names are first row boolean hasColumnNames = getParent().includeColumnNames(); // currently bound variables in the prepared statement Map<ColumnData, String> boundVariables = null; // ignored indexes of columns from the file List<Integer> ignoredIndexes = null; if (hasColumnNames) { boundVariables = new HashMap<ColumnData, String>(); ignoredIndexes = new ArrayList<Integer>(); appendProgressText("Using column names from input file's first row."); } // columns to be imported that are in the file Map<ColumnData, String> fileImportedColumns = new HashMap<ColumnData, String>(); // whether the data format failed (switch structure) boolean failed = false; // define the delimiter String delim = getParent().getDelimiter(); // --------------------------- // --- initialise counters --- // --------------------------- // the table's column count int columnCount = -1; // the length of each line in the file int rowLength = -1; // progress bar values int progressStatus = -1; // ongoing progress value int progressCheck = -1; // the import file size long fileSize = -1; // set the date format if (parseDateValues()) { try { dateFormat = createDateFormatter(); } catch (IllegalArgumentException e) { errorCount++; outputExceptionError("Error applying date mask", e); return FAILED; } } // record the start time start(); // setup the regex matcher for delims // ---------------------------------------------------------------- // below was the original pattern from oreilly book. // discovered issues when parsing values with quotes // in them - not only around them. /* String regex = "(?:^|\\" + delim + ") (?: \" ( (?> [^\"]*+ ) (?> \"\" [^\"]*+ )*+ ) \" | ( [^\"\\" + delim + "]*+ ) )"; Matcher matcher = Pattern.compile(regex, Pattern.COMMENTS).matcher(""); Matcher qMatcher = Pattern.compile("\"\"", Pattern.COMMENTS).matcher(""); */ // ---------------------------------------------------------------- // modified above to regex below // added the look-ahead after the close quote // and removed the quote from the last regex pattern String escapedDelim = escapeDelim(delim); String regex = "(?:^|" + escapedDelim + ") (?: \" ( (?> [^\"]*+ ) (?> \"\" [^\"]*+ )*+ ) \"(?=" + escapedDelim + "?) | ( [^" + escapedDelim + "]*+ ) )"; // ---------------------------------------------------------------- // changed above to the following - seems to work for now // regex pattern in full - where <delim> is the delimiter to use // \"([^\"]+?)\"<delim>?|([^<delim>]+)<delim>?|<delim> // // fixed oreilly one - not running this one // ---------------------------------------------------------------- Matcher matcher = Pattern.compile(regex, Pattern.COMMENTS).matcher(""); Matcher qMatcher = Pattern.compile("\"\"", Pattern.COMMENTS).matcher(""); // ---------------------------------------- // --- begin looping through the tables --- // ---------------------------------------- // ensure the connection has auto-commit to false conn = getConnection(); conn.setAutoCommit(false); int currentRowLength = 0; boolean insertLine = false; // the number of columns actually available in the file int filesColumnCount = 0; for (int i = 0; i < fileCount; i++) { lineNumber = 0; tableInsertCount = 0; tableCommitCount = 0; rollbackCount = 0; tableRowCount = 0; rowLength = 0; if (Thread.interrupted()) { setProgressStatus(100); throw new InterruptedException(); } tableCount++; DataTransferObject dto = (DataTransferObject) files.elementAt(i); // initialise the file object File inputFile = new File(dto.getFileName()); outputBuffer.append("---------------------------\nTable: "); outputBuffer.append(dto.getTableName()); outputBuffer.append("\nImport File: "); outputBuffer.append(inputFile.getName()); appendProgressText(outputBuffer); // setup the reader objects fileReader = new FileReader(inputFile); reader = new BufferedReader(fileReader); // retrieve the columns to be imported (or all) Vector<ColumnData> columns = getColumns(dto.getTableName()); columnCount = columns.size(); filesColumnCount = columnCount; // the wntire row read String row = null; // the current delimited value String value = null; // the ignored column count int ignoredCount = 0; // clear the file columns cache fileImportedColumns.clear(); // if the first row in the file has the column // names compare these with the columns selected if (hasColumnNames) { // init the bound vars cache with the selected columns boundVariables.clear(); for (int k = 0; k < columnCount; k++) { boundVariables.put(columns.get(k), VARIABLE_NOT_BOUND); } row = reader.readLine(); lineNumber++; String[] _columns = MiscUtils.splitSeparatedValues(row, delim); if (_columns != null && _columns.length > 0) { filesColumnCount = _columns.length; // -------------------------------------- // first determine if we have any columns in the // input file that were not selected for import // reset the ignored columns ignoredIndexes.clear(); // set up another list to re-add the columns in // the order in which they appear in the file. // all other columns will be added to the end Vector<ColumnData> temp = new Vector<ColumnData>(columnCount); ColumnData cd = null; int ignoredIndex = -1; for (int j = 0; j < _columns.length; j++) { ignoredIndex = j; String column = _columns[j]; for (int k = 0; k < columnCount; k++) { cd = columns.get(k); String _column = cd.getColumnName(); if (_column.equalsIgnoreCase(column)) { temp.add(cd); fileImportedColumns.put(cd, INCLUDED_COLUMN); ignoredIndex = -1; break; } } if (ignoredIndex != -1) { ignoredIndexes.add(Integer.valueOf(ignoredIndex)); } } ignoredCount = ignoredIndexes.size(); // if we didn't find any columns at all, show warning if (temp.isEmpty()) { String message = "No matching column names were " + "found within the specified file's first line.\n" + "The current file will be ignored."; outputBuffer.append(message); appendProgressWarningText(outputBuffer); int yesNo = GUIUtilities.displayYesNoDialog(message + "\nDo you wish to continue?", "Warning"); if (yesNo == JOptionPane.YES_OPTION) { continue; } else { throw new InterruptedException(); } } else { // add any other selected columns to the // end of the temp list with the columns // available in the file boolean addColumn = false; for (int k = 0; k < columnCount; k++) { addColumn = false; cd = columns.get(k); for (int j = 0, n = temp.size(); j < n; j++) { addColumn = true; if (temp.get(j) == cd) { addColumn = false; break; } } if (addColumn) { temp.add(cd); } } columns = temp; // note: size should not have changed } } } // otherwise just populate the columns in the file // with all the selected columns else { for (int j = 0; j < columnCount; j++) { fileImportedColumns.put(columns.get(j), INCLUDED_COLUMN); } } /* Log.debug("ignored count: " + ignoredCount); for (int j = 0; j < columnCount; j++) { Log.debug("column: " + columns.get(j)); } */ fileSize = inputFile.length(); progressStatus = 10; progressCheck = (int) (fileSize / progressStatus); // prepare the statement prepareStatement(dto.getTableName(), columns); if (parsingDates && dateFormat == null) { // check for a date data type for (int j = 0; j < columnCount; j++) { if (dateFormat == null && !hasDate) { ColumnData cd = columns.get(j); if (fileImportedColumns.containsKey(cd)) { if (cd.isDateDataType()) { hasDate = true; break; } } } } if (hasDate && dateFormat == null) { String pattern = verifyDate(); if (StringUtils.isNotBlank(pattern)) { fileReader.close(); setProgressStatus(100); throw new InterruptedException(); } dateFormat = createDateFormatter(pattern); } } rowLength = 0; while ((row = reader.readLine()) != null) { insertLine = true; lineNumber++; tableRowCount++; totalRecordCount++; if (Thread.interrupted()) { fileReader.close(); printTableResult(tableRowCount, tableCommitCount, dto.getTableName()); setProgressStatus(100); throw new InterruptedException(); } currentRowLength = row.length(); if (currentRowLength == 0) { outputBuffer.append("Line "); outputBuffer.append(lineNumber); outputBuffer.append(" contains no delimited values"); appendProgressWarningText(outputBuffer); int yesNo = GUIUtilities.displayYesNoDialog("No values provided from line " + lineNumber + " - the row is blank.\n" + "Do you wish to continue?", "Warning"); if (yesNo == JOptionPane.YES_OPTION) { continue; } else { throw new InterruptedException(); } } rowLength += currentRowLength; if (progressCheck < rowLength) { setProgressStatus(progressStatus); progressStatus += 10; rowLength = 0; } // reset matcher with current row matcher.reset(row); int index = 0; int lastIndex = -1; int loopIgnoredCount = 0; //Log.debug(row); for (int j = 0; j < filesColumnCount; j++) { if (matcher.find(index)) { String first = matcher.group(2); if (first != null) { value = first; } else { qMatcher.reset(matcher.group(1)); value = qMatcher.replaceAll("\""); } index = matcher.end(); // check if its an ignored column if (ignoredCount > 0) { if (isIndexIgnored(ignoredIndexes, j)) { loopIgnoredCount++; continue; } } } else { // not enough delims check if (j < (filesColumnCount - 1) && index > (currentRowLength - 1)) { outputBuffer.append("Insufficient number of column "); outputBuffer.append("values provided at line "); outputBuffer.append(lineNumber); appendProgressErrorText(outputBuffer); int yesNo = GUIUtilities .displayYesNoDialog("Insufficient number of values provided from line " + lineNumber + ".\n" + "Do you wish to continue?", "Warning"); if (yesNo == JOptionPane.YES_OPTION) { insertLine = false; break; } else { throw new InterruptedException(); } } else { // check if we're on a delim the matcher didn't pick up int delimLength = delim.length(); if (row.substring(index, index + delimLength).equals(delim)) { // increment index index++; // assign as null value value = null; } } } // check if we landed on the same index - likely null value if (index == lastIndex) { index++; } lastIndex = index; if (value != null && value.trim().length() == 0) { value = null; } try { ColumnData cd = columns.get(j - loopIgnoredCount); setValue(value, getIndexOfColumn(columns, cd) + 1, cd.getSQLType(), trimWhitespace, dateFormat); if (hasColumnNames) { boundVariables.put(cd, VARIABLE_BOUND); } } catch (ParseException e) { errorCount++; failed = true; outputBuffer.append("Error parsing date value - "); outputBuffer.append(value); outputBuffer.append(" - on line "); outputBuffer.append(lineNumber); outputBuffer.append(" at position "); outputBuffer.append(j); outputExceptionError(null, e); break; } catch (NumberFormatException e) { errorCount++; failed = true; outputBuffer.append("Error parsing value - "); outputBuffer.append(value); outputBuffer.append(" - on line "); outputBuffer.append(lineNumber); outputBuffer.append(" at position "); outputBuffer.append(j); outputExceptionError(null, e); break; } } if (!insertLine) { prepStmnt.clearParameters(); continue; } if (failed && haltOnError) { processResult = FAILED; break; } // execute the statement try { // check all variables are bound if we used // the column names from the first row if (hasColumnNames) { index = 0; // check all variables are bound - insert NULL otherwise for (Map.Entry<ColumnData, String> entry : boundVariables.entrySet()) { ColumnData cd = entry.getKey(); if (VARIABLE_NOT_BOUND.equals(entry.getValue())) { index = getIndexOfColumn(columns, cd); prepStmnt.setNull(index + 1, cd.getSQLType()); } } } if (isBatch) { prepStmnt.addBatch(); } else { int result = prepStmnt.executeUpdate(); tableInsertCount += result; commitCount += result; } rollbackCount++; // check the rollback segment if (rollbackCount == rollbackSize) { if (isBatch) { int result = getBatchResult(prepStmnt.executeBatch())[0]; tableInsertCount += result; commitCount += result; prepStmnt.clearBatch(); } conn.commit(); totalInsertCount += commitCount; tableCommitCount = tableInsertCount; rollbackCount = 0; commitCount = 0; } // reset bound variables if (hasColumnNames) { for (int k = 0; k < columnCount; k++) { boundVariables.put(columns.get(k), VARIABLE_NOT_BOUND); } } } catch (SQLException e) { logException(e); errorCount++; if (!isBatch) { outputBuffer.append("Error inserting data from line "); outputBuffer.append(lineNumber); outputExceptionError(null, e); } else { outputBuffer.append("Error on last batch execution"); outputExceptionError(null, e); } if (haltOnError) { processResult = FAILED; conn.rollback(); getParent().cancelTransfer(); throw new InterruptedException(); } } } // ---------------------------- // file/table has ended here if (isBatch) { int[] batchResult = null; try { batchResult = getBatchResult(prepStmnt.executeBatch()); int result = batchResult[0]; tableInsertCount += result; commitCount += result; tableCommitCount = tableInsertCount; } catch (BatchUpdateException e) { logException(e); int[] updateCounts = e.getUpdateCounts(); batchResult = getBatchResult(updateCounts); errorCount += batchResult[1]; if (errorCount == 0) { errorCount = 1; } outputBuffer.append("An error occured during the batch process: "); outputBuffer.append(e.getMessage()); SQLException _e = e.getNextException(); while (_e != null) { outputBuffer.append("\nNext Exception: "); outputBuffer.append(_e.getMessage()); _e = _e.getNextException(); } outputBuffer.append("\n\nRecords processed to the point "); outputBuffer.append("where this error occurred: "); outputBuffer.append(updateCounts.length); appendProgressErrorText(outputBuffer); processResult = FAILED; } // Log.debug("commitCount: " + commitCount + // " batch: " + batchResult[0]); if (tableRowCount != tableInsertCount) { conn.rollback(); if (onError == ImportExportProcess.STOP_TRANSFER) { getParent().cancelTransfer(); processResult = FAILED; throw new InterruptedException(); } } } boolean doCommit = true; if (failed && !isBatch && rollbackSize != ImportExportProcess.COMMIT_END_OF_ALL_FILES) { int yesNo = GUIUtilities.displayYesNoDialog( "The process completed with errors.\n" + "Do you wish to commit the last block?", "Confirm commit"); doCommit = (yesNo == JOptionPane.YES_OPTION); } // do the commit if ok from above // and if rollback size selected is end of file if (rollbackSize == ImportExportProcess.COMMIT_END_OF_FILE) { if (doCommit) { conn.commit(); totalInsertCount += commitCount; tableCommitCount = tableInsertCount; commitCount = 0; } else { conn.rollback(); } } // update the progress display printTableResult(tableRowCount, tableInsertCount, dto.getTableName()); setProgressStatus(100); // reset the checks hasDate = false; failed = false; } // commit the last remaining block or where // set to commit at the end of all files if (rollbackSize != ImportExportProcess.COMMIT_END_OF_FILE) { setProgressStatus(100); boolean doCommit = true; if (errorCount > 0 && errorCount != totalRecordCount) { int yesNo = GUIUtilities.displayYesNoDialog( "The process completed with errors.\n" + "Do you wish to commit the changes?", "Confirm commit"); doCommit = (yesNo == JOptionPane.YES_OPTION); } if (doCommit) { conn.commit(); totalInsertCount += commitCount; } else { conn.rollback(); } } processResult = SUCCESS; } catch (InterruptedException e) { if (processResult != FAILED) { processResult = CANCELLED; } try { if (prepStmnt != null) { prepStmnt.cancel(); } if (conn != null) { conn.rollback(); } } catch (SQLException e2) { outputExceptionError("Error rolling back transaction", e); } } catch (Exception e) { logException(e); outputBuffer.append("Error processing data from line "); outputBuffer.append(lineNumber); outputExceptionError("\nUnrecoverable error importing table data from file", e); int yesNo = GUIUtilities.displayYesNoDialog( "The process encountered errors.\n" + "Do you wish to commit the last transaction block?", "Confirm commit"); boolean doCommit = (yesNo == JOptionPane.YES_OPTION); try { if (doCommit) { conn.commit(); totalInsertCount += commitCount; } else { conn.rollback(); } } catch (SQLException e2) { logException(e2); outputExceptionError("Error processing last transaction block", e2); } processResult = FAILED; } finally { finish(); releaseResources(getParent().getDatabaseConnection()); if (totalRecordCount == 0 || errorCount > 0) { processResult = FAILED; } setTableCount(tableCount); setRecordCount(totalRecordCount); setRecordCountProcessed(totalInsertCount); setErrorCount(errorCount); setProgressStatus(100); GUIUtilities.scheduleGC(); if (reader != null) { try { reader.close(); } catch (IOException e) { } } if (fileReader != null) { try { fileReader.close(); } catch (IOException e) { } } if (prepStmnt != null) { try { prepStmnt.close(); } catch (SQLException e) { } } } return processResult; }
From source file:org.codecover.eclipse.views.CoverageGraphView.java
private edu.uci.ics.jung.graph.Graph<CoverageGraphNode, CoverageGraphLink> createGraph(String Criterion, String SUTLevel, String TestLevel, Boolean ShowOnlyCovered, Boolean CompleteName) { edu.uci.ics.jung.graph.Graph<CoverageGraphNode, CoverageGraphLink> graph = new SparseMultigraph<CoverageGraphNode, CoverageGraphLink>(); try {/*from ww w .j ava2 s . c o m*/ Set<CoverableItem> coverableItemSet = CreateCoverableItemSet(Criterion); Vector<CoverageGraphNode> SUTItems = new Vector<CoverageGraphNode>(); Vector<String> SUTItemsId = new Vector<String>(); Vector<CoverageGraphNode> TestItems = new Vector<CoverageGraphNode>(); Vector<String> TestItemsId = new Vector<String>(); int NumOfSUTNodes = 0; int NumOfTestNodes = 0; if (selectedTestCases.size() != 0) { CoverageGraphNode SUTNode; CoverageGraphNode TestNode; List<CoverableItem> SUTItemList = new ArrayList<CoverableItem>(coverableItemSet); List<CoverableItem> CoveredItemList; //Adding all of the SUT Nodes to the graph: //----------------------------------------- for (int j = 0; j < SUTItemList.size(); j++) { boolean ex = false; HierarchyLevel methodLevel = null; try { methodLevel = getSUTItemMethod(SUTItemList.get(j)); } catch (IllegalArgumentException e1) { // the item is a condition, it is not possible to get the parent statement in the tree (it's parent is null) ex = true; } catch (Exception e2) { ex = true; } if (methodLevel != null && !ex) { String PackageName = getSUTItemPackage(getSUTItemClass(methodLevel)); String ClassName = getSUTItemClass(methodLevel).getName(); String MethodName = methodLevel.getName(); String ItemName = getSUTItemId(SUTItemList.get(j).getId()); String nodeName = getNodeLable(SUTLevel, PackageName, ClassName, MethodName, ItemName); if (!SUTItemsId.contains(nodeName)) { SUTNode = new CoverageGraphNode("SUT", SUTLevel, PackageName, ClassName, getSUTItemClass(methodLevel).getLocation().getLocations().get(0), MethodName, methodLevel.getLocation().getLocations().get(0), ItemName, loc, StContent, methodLevel, CompleteName); SUTItemsId.add(SUTNode.getLable()); SUTItems.add(SUTNode); NumOfSUTNodes++; if (!ShowOnlyCovered) graph.addVertex(SUTNode); } } } Set<CoverableItem> coveredItemSet; int testsize = 0; if (!selectedTestCases.equals(null)) testsize = selectedTestCases.size(); for (int i = 0; i < testsize; i++) { //Adding Test Nodes to the graph: //------------------------------- TestCase tc = (TestCase) selectedTestCases.get(i); TestNode = new CoverageGraphNode("Test", TestLevel, getTestNodeName(tc.getName(), "Package"), getTestNodeName(tc.getName(), "Class"), getTestNodeName(tc.getName(), "Method"), CompleteName); String testNodeName = TestNode.getLable(); if (!TestItemsId.contains(testNodeName)) { TestItemsId.add(TestNode.getLable()); TestItems.add(TestNode); graph.addVertex(TestNode); TestNode.Testcases.add(tc); NumOfTestNodes++; } else { for (int k = 0; k < NumOfTestNodes; k++) if (TestItems.get(k).getLable().compareTo(testNodeName) == 0) TestNode = TestItems.get(k); TestNode.Testcases.add(tc); } Map<CoverableItem, Long> CoveredItemMap = tc.getCoverageData(); coveredItemSet = new HashSet<CoverableItem>(); for (int j = 0; j < SUTItemList.size(); j++) { coveredItemSet.add(SUTItemList.get(j)); } coveredItemSet.retainAll(CoveredItemMap.keySet()); CoveredItemList = new ArrayList<CoverableItem>(coveredItemSet); String nodeName = ""; for (int j = 0; j < CoveredItemList.size(); j++) { boolean ex = false; HierarchyLevel methodLevel = null; HierarchyLevel currentlevel = null; try { methodLevel = getSUTItemMethod(CoveredItemList.get(j)); } catch (Exception ec) { ex = true; } if (methodLevel != null && !ex) { String PackageName = getSUTItemPackage(getSUTItemClass(methodLevel)); String ClassName = getSUTItemClass(methodLevel).getName(); String MethodName = methodLevel.getName(); String ItemName = getSUTItemId(CoveredItemList.get(j).getId()); nodeName = getNodeLable(SUTLevel, PackageName, ClassName, MethodName, ItemName); //Adding Edges to the graph: //-------------------------- Integer id = graph.getEdgeCount() + 1; CoverageGraphLink CoverageLink = graph.findEdge(TestItems.lastElement(), SUTItems.elementAt(SUTItemsId.indexOf(nodeName))); if (CoverageLink == null) { CoverageLink = new CoverageGraphLink(id); CoverageLink.SUTLevel = SUTLevel; } else graph.removeEdge(CoverageLink); CoverageLink.times++; CoverageGraphNode CurrentNode = new CoverageGraphNode(); for (int k = 0; k < NumOfSUTNodes; k++) if (SUTItems.get(k).getLable().compareTo(nodeName) == 0) CurrentNode = SUTItems.get(k); if (CurrentNode != null) { if (ShowOnlyCovered) graph.addVertex(CurrentNode); //For calculating Ratio: if (SUTLevel.compareTo("Method") == 0) currentlevel = methodLevel; else if (SUTLevel.compareTo("Class") == 0) currentlevel = topLevel.getParent(methodLevel); else if (SUTLevel.compareTo("Package") == 0) { currentlevel = topLevel.getParent(methodLevel); currentlevel = topLevel.getParent(currentlevel); } if (currentlevel != null) { CoverageResult coverageResult = null; if (Criterion.compareTo("Statement") == 0) { StatementCoverage coverageMetric = StatementCoverage.getInstance(); coverageResult = coverageMetric.getCoverage(TestNode.Testcases, currentlevel); } else if (Criterion.compareTo("Branch") == 0) { BranchCoverage coverageMetric = BranchCoverage.getInstance(); coverageResult = coverageMetric.getCoverage(TestNode.Testcases, currentlevel); } else if (Criterion.compareTo("Loop") == 0) { LoopCoverage coverageMetric = LoopCoverage.getInstance(); coverageResult = coverageMetric.getCoverage(TestNode.Testcases, currentlevel); } else if (Criterion.compareTo("Term") == 0) { TermCoverage coverageMetric = TermCoverage.getInstance(); coverageResult = coverageMetric.getCoverage(TestNode.Testcases, currentlevel); } if (coverageResult != null) { float coverage = 0f; if (coverageResult.getTotalItems() > 0) { coverage = ((float) coverageResult.getCoveredItems() / coverageResult.getTotalItems()); } CoverageLink.ratio = coverage; } } else CoverageLink.ratio = 2; graph.addEdge(CoverageLink, TestItems.lastElement(), CurrentNode, EdgeType.DIRECTED); } } } coveredItemSet.clear(); } } } catch (Exception ex) { return null; } return graph; }
From source file:maui.main.MauiTopicExtractor.java
/** * Builds the model from the files/*from w ww .j ava2 s . co m*/ */ public void extractKeyphrases(HashSet<String> fileNames) throws Exception { // Check whether there is actually any data if (fileNames.size() == 0) { throw new Exception("Couldn't find any data in " + inputDirectoryName); } mauiFilter.setVocabularyName(vocabularyName); mauiFilter.setVocabularyFormat(vocabularyFormat); mauiFilter.setDocumentLanguage(documentLanguage); mauiFilter.setStemmer(stemmer); mauiFilter.setStopwords(stopwords); if (wikipedia != null) { mauiFilter.setWikipedia(wikipedia); } else if (wikipediaServer.equals("localhost") && wikipediaDatabase.equals("database")) { mauiFilter.setWikipedia(wikipedia); } else { mauiFilter.setWikipedia(wikipediaServer, wikipediaDatabase, cacheWikipediaData, wikipediaDataDirectory); } if (!vocabularyName.equals("none") && !vocabularyName.equals("wikipedia")) { loadThesaurus(stemmer, stopwords, vocabularyDirectory); mauiFilter.setVocabulary(vocabulary); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("filename", (FastVector) null)); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); System.err.println("-- Extracting keyphrases... "); Vector<Double> correctStatistics = new Vector<Double>(); Vector<Double> precisionStatistics = new Vector<Double>(); Vector<Double> recallStatistics = new Vector<Double>(); for (String fileName : fileNames) { double[] newInst = new double[3]; newInst[0] = (double) data.attribute(0).addStringValue(fileName); ; File documentTextFile = new File(inputDirectoryName + "/" + fileName + ".txt"); File documentTopicsFile = new File(inputDirectoryName + "/" + fileName + ".key"); try { String documentText; if (!documentEncoding.equals("default")) { documentText = FileUtils.readFileToString(documentTextFile, documentEncoding); } else { documentText = FileUtils.readFileToString(documentTextFile); } // Adding the text of the document to the instance newInst[1] = (double) data.attribute(1).addStringValue(documentText); } catch (Exception e) { System.err.println("Problem with reading " + documentTextFile); e.printStackTrace(); newInst[1] = Instance.missingValue(); } try { String documentTopics; if (!documentEncoding.equals("default")) { documentTopics = FileUtils.readFileToString(documentTopicsFile, documentEncoding); } else { documentTopics = FileUtils.readFileToString(documentTopicsFile); } // Adding the topics to the file newInst[2] = (double) data.attribute(2).addStringValue(documentTopics); } catch (Exception e) { if (debugMode) { System.err.println("No existing topics for " + documentTextFile); } newInst[2] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); mauiFilter.input(data.instance(0)); data = data.stringFreeStructure(); if (debugMode) { System.err.println("-- Processing document: " + fileName); } Instance[] topRankedInstances = new Instance[topicsPerDocument]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = mauiFilter.output()) != null) { int index = (int) inst.value(mauiFilter.getRankIndex()) - 1; if (index < topicsPerDocument) { topRankedInstances[index] = inst; } } if (debugMode) { System.err.println("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; if (!documentTopicsFile.exists()) { out = new FileOutputStream(documentTopicsFile); if (!documentEncoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, documentEncoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; wikipedia = mauiFilter.getWikipedia(); HashMap<Article, Integer> topics = null; if (printGraph) { topics = new HashMap<Article, Integer>(); } int p = 0; String root = ""; for (int i = 0; i < topicsPerDocument; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } if (printer != null) { String topic = topRankedInstances[i].stringValue(mauiFilter.getOutputFormIndex()); printer.print(topic); if (printGraph) { Article article = wikipedia.getArticleByTitle(topic); if (article == null) { article = wikipedia.getMostLikelyArticle(topic, new CaseFolder()); } if (article != null) { if (root == "") { root = article.getTitle(); } topics.put(article, new Integer(p)); } else { if (debugMode) { System.err.println( "Couldn't find article for " + topic + " in " + documentTopicsFile); } } p++; } if (additionalInfo) { printer.print("\t"); printer.print(topRankedInstances[i].stringValue(mauiFilter.getNormalizedFormIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(mauiFilter.getProbabilityIndex()), 4)); } printer.println(); } if (debugMode) { System.err.println(topRankedInstances[i]); } } } if (printGraph) { String graphFile = documentTopicsFile.getAbsolutePath().replace(".key", ".gv"); computeGraph(topics, root, graphFile); } if (numExtracted > 0) { if (debugMode) { System.err.println("-- " + numCorrect + " correct"); } double totalCorrect = mauiFilter.getTotalCorrect(); correctStatistics.addElement(new Double(numCorrect)); precisionStatistics.addElement(new Double(numCorrect / numExtracted)); recallStatistics.addElement(new Double(numCorrect / totalCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } if (correctStatistics.size() != 0) { double[] st = new double[correctStatistics.size()]; for (int i = 0; i < correctStatistics.size(); i++) { st[i] = correctStatistics.elementAt(i).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); if (correctStatistics.size() == 1) { System.err.println("\n-- Evaluation results based on 1 document:"); } else { System.err.println("\n-- Evaluation results based on " + correctStatistics.size() + " documents:"); } System.err.println("Avg. number of correct keyphrases per document: " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); st = new double[precisionStatistics.size()]; for (int i = 0; i < precisionStatistics.size(); i++) { st[i] = precisionStatistics.elementAt(i).doubleValue(); } double avgPrecision = Utils.mean(st); double stdDevPrecision = Math.sqrt(Utils.variance(st)); System.err.println("Precision: " + Utils.doubleToString(avgPrecision * 100, 2) + " +/- " + Utils.doubleToString(stdDevPrecision * 100, 2)); st = new double[recallStatistics.size()]; for (int i = 0; i < recallStatistics.size(); i++) { st[i] = recallStatistics.elementAt(i).doubleValue(); } double avgRecall = Utils.mean(st); double stdDevRecall = Math.sqrt(Utils.variance(st)); System.err.println("Recall: " + Utils.doubleToString(avgRecall * 100, 2) + " +/- " + Utils.doubleToString(stdDevRecall * 100, 2)); double fMeasure = 2 * avgRecall * avgPrecision / (avgRecall + avgPrecision); System.err.println("F-Measure: " + Utils.doubleToString(fMeasure * 100, 2)); System.err.println(""); } mauiFilter.batchFinished(); }
From source file:org.ecoinformatics.datamanager.database.DelimitedReader.java
private String[] processQuoteCharacterOneRowData(String oneRowData) throws Exception { String[] elements = null;//from w w w. j a va 2 s . c o m Vector elementsVector = new Vector(); if (oneRowData == null) { return elements; } quoteCharacter = transferQuoteCharacter(quoteCharacter); char quote = '#'; boolean quoted = false; if (quoteCharacter != null) { quoted = true; quote = quoteCharacter.charAt(0); } char literal = '/'; boolean literaled = false; if (literalCharacter != null) { literaled = true; literal = literalCharacter.charAt(0); } if (literaled && literalCharacter.length() != 1) { throw new Exception("Literal Character length should be 1 character in EML"); } char currentChar = '2'; StringBuffer fieldData = new StringBuffer(); int length = oneRowData.length(); int priviousDelimiterIndex = -2; int currentDelimiterIndex = -2; int delimiterLength = delimiter.length(); boolean startQuote = false; boolean delimiterAtEnd = false; //this string buffer is only for deleting if hit a delimiter StringBuffer delimiterStorage = new StringBuffer(delimiter.length()); for (int i = 0; i < length; i++) { currentChar = oneRowData.charAt(i); //System.out.println("current char is "+currentChar); fieldData.append(currentChar); if (i < delimiterLength) { delimiterStorage.append(currentChar); } else { //delimiterStorage.deleteCharAt(position); delimiterStorage = shiftBuffer(delimiterStorage, currentChar); } //System.out.println("current delimiter storage content is "+delimiterStorage.toString()); //System.out.println("currnet value in the string buffer is "+fieldData.toString()); // we should check if there is quoteCharacter in the string. if (quoted && currentChar == quote) { char previousChar = '1'; boolean escapingQuote = false; // check if this quote is escaped if (literaled) { if ((i - 1) >= 0) { previousChar = oneRowData.charAt(i - 1); if (previousChar == literal) { escapingQuote = true; // delette the literal character if (!includeLiteralCharacter) { //if we don't want literal character in the data, //we should delete literal character. int fieldLength = fieldData.length(); if ((fieldLength - 1 - 1) >= 0) { fieldData.deleteCharAt(fieldLength - 1 - 1); } } } } } if (!escapingQuote) { if (!startQuote) { //System.out.println("start quote"); startQuote = true; } else { //System.out.println("end quote"); // at end of quote //put string buffers value into vector and reset string buffer startQuote = false; } } } //found a delimiter if (delimiterStorage.indexOf(delimiter) != -1 && !startQuote) { //check if there is literal character before the delimiter, //if does, this we should skip this delmiter int indexOfCharBeforeDelimiter = i - delimiterLength; boolean escapeDelimiter = false; if (literaled && indexOfCharBeforeDelimiter >= 0) { char charBeforeDelimiter = oneRowData.charAt(indexOfCharBeforeDelimiter); ////there is a literal character before delimiter we should skip this demlimiter if (charBeforeDelimiter == literal) { if (!includeLiteralCharacter) { //if we don't want literal character in the data, //we should delete literal character. int fieldLength = fieldData.length(); if ((fieldLength - delimiterLength - 1) >= 0) { fieldData.deleteCharAt(fieldLength - delimiterLength - 1); } } escapeDelimiter = true; continue; } } // check if the delimiter is in the end of the string if (i == (length - 1) && !startQuote && !escapeDelimiter) { delimiterAtEnd = true; } ////here we should treat sequential delimiter as single delimiter if (collapseDelimiters) { priviousDelimiterIndex = currentDelimiterIndex; currentDelimiterIndex = i; //there is nothing between two delimiter, should skip it. if ((currentDelimiterIndex - priviousDelimiterIndex) == delimiterLength) { //delete sequnced delimiter fieldData = new StringBuffer(); continue; } } String value = ""; int delimiterIndex = fieldData.lastIndexOf(delimiter); if (delimiterIndex == 0) { //this path means field data on has delimiter, no real data value = ""; } else { value = fieldData.substring(0, delimiterIndex); } elementsVector.add(value); //reset string buffer fieldData fieldData = new StringBuffer(); } } // if startQuote is true at the end, which means there is no close quote character in this row, // code should throw an exception if (startQuote) { throw new Exception("There is a un-closed quote in data file"); } // add last field. If this string end of delimiter, we need add a "" // else, we need to add the value in string buffer. String lastFieldValue = null; if (delimiterAtEnd == true) { //this path means field data on has delimiter, no real data lastFieldValue = ""; } else { lastFieldValue = fieldData.toString(); } elementsVector.add(lastFieldValue); //transform vector to string array int size = elementsVector.size(); elements = new String[size]; for (int i = 0; i < size; i++) { elements[i] = (String) elementsVector.elementAt(i); } return elements; }
From source file:focusedCrawler.util.parser.PaginaURL.java
protected void separadorTextoCodigo(String arquivo) { // arquivo equivale ao codigo HTML da pagina if (codes.size() == 0) { loadHashCodes();/*from w w w . j a v a2 s . c om*/ } // System.out.println(arquivo); boolean obj_isRDF = false; boolean ignorar_espacos = true; boolean tag_tipo_fim = false; boolean tag_tipo_vazia = true; boolean em_script = false; boolean ehInicioALT = true; boolean em_titulo = false; boolean em_option = false; boolean em_comentario = false; int num_comentario = 0; int PONTUACAO_PALAVRAS_TEXTO = 2; int PONTUACAO_PALAVRAS_OPTION = 1; int PONTUACAO_PALAVRAS_URL = 3; int PONTUACAO_PALAVRAS_META = 1; int PONTUACAO_PALAVRAS_TITULO = 7; int PONTUACAO_PALAVRAS_DESCRIPTION = 5; int PONTUACAO_PALAVRAS_ALT = 1; int posicao_da_palavra = 1; int numOfHtmlTags = 0; // UTILIZANDO AS PALAVRAS DA URL COMO INFORMACAO TEXTUAL if (pagina != null && !filterURL) { StringTokenizer url_pontos = new StringTokenizer(pagina.getHost(), "./:"); while (url_pontos.hasMoreTokens()) { String parte_host = url_pontos.nextToken(); if (!parte_host.equals("www") && !parte_host.equals("org") && !parte_host.equals("gov") && !parte_host.equals("com") && !parte_host.equals("br")) { boolean adicionou = adicionaAoVetorDeTexto(parte_host); if (adicionou) { adicionaTermoPosicao(parte_host, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(parte_host, PONTUACAO_PALAVRAS_URL); String parte_host_sem_acento = Acentos.retirarNotacaoHTMLAcentosANSI(parte_host); if (!parte_host_sem_acento.equals(parte_host)) { adicionou = adicionaAoVetorDeTexto(parte_host_sem_acento); if (adicionou) { adicionaTermoPosicao(parte_host_sem_acento, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(parte_host_sem_acento, PONTUACAO_PALAVRAS_URL); } } posicao_da_palavra++; } } } } boolean em_body = false; boolean em_meta_robots = false; boolean tagScript = false; boolean tagTitulo = false; boolean tagBody = false; boolean tagOption = false; int pos_caracter_especial = -1; char quote_char = '\0'; URL base = pagina; // pagina = URL da pagina atual... Vector frames = new Vector(); char c = '\0'; char ant1 = '\0'; char ant2 = '\0'; int n = 0; int n_anterior = 0; String str = ""; String anchor = ""; int numOfwordsAnchor = 0; LinkNeighborhood ln = null; String tagName = ""; String lastTag = ""; String atributo = ""; boolean insideATag = false; boolean em_meta_description = false; // thiago String str_da_metatag_description = null; // thiago final int INICIO = 1; final int TAG_NAME = 2; final int TOKEN_PALAVRA = 3; final int PALAVRA = 4; final int ATRIBUTO = 5; final int FECHANDO = 6; final int IGUAL = 7; final int VALOR = 8; final int META_TAG = 10; final int ALT_TAG = 11; int estado = INICIO; try { // FileOutputStream fout = null; // DataOutputStream dout = null; //System.out.println("FORM!!! : " + form.getURL()); // try { // fout = new FileOutputStream("/home/lbarbosa/test"); // dout = new DataOutputStream( fout ); // dout.writeBytes("begin"); // // } // catch (FileNotFoundException ex) { // ex.printStackTrace(); // } // catch (IOException ex) { // ex.printStackTrace(); // } while (n < arquivo.length()) { if (n_anterior < n) { /* we advanced a character */ ant1 = ant2; ant2 = c; } n_anterior = n; c = arquivo.charAt(n); // System.out.print(c+""); // int ascii = (int) c; // System.out.print(ascii); // System.out.println(""); // dout.writeBytes(c+""); // dout.flush(); // if(c=='\u0000'){ // organizaDados(); // return; // } if (em_comentario && num_comentario > 0) { if ((ant1 == '-') && (ant2 == '-') || (c == '>')) { num_comentario--; if (num_comentario == 0) em_comentario = false; } n++; } else if (ignorar_espacos) { if (Character.isWhitespace(c)) { n++; } else { ignorar_espacos = false; } } else { boolean fimDeString = false; switch (estado) { case INICIO: /* INICIO - Esperando texto ou caracter de abertura de tag '<' */ // System.out.println("Entrei no inicio e caractere=" + c); if (c == '<') { estado = TAG_NAME; tagName = ""; tag_tipo_fim = false; em_meta_robots = false; n++; } else { estado = TOKEN_PALAVRA; pos_caracter_especial = -1; } quote_char = '\0'; break; case TOKEN_PALAVRA: // if(str.contains("1044")){ // System.out.println("TEST"); // } /* faz o token da string */ if ((caracterFazParteDePalavra(c)) || (c == ';') || (c == '&')) { str += converteChar(c); n++; int begin = str.indexOf("&#"); int end = str.indexOf(";"); if (begin != -1 && end != -1) { String specialchar = str.substring(begin + 2, end); try { int hex = Integer.parseInt(specialchar); char uni = (char) hex; String unicode = uni + ""; str = str.substring(0, begin) + unicode; // System.out.println(unicode); pos_caracter_especial = -1; continue; } catch (Exception e) { // TODO: handle exception } } if (str.toLowerCase().contains("ñ")) { str = str.toLowerCase().replace("ñ", "n"); pos_caracter_especial = -1; continue; } if (str.contains("")) { str = str.replace("", "n"); pos_caracter_especial = -1; continue; } if (c == '&') { pos_caracter_especial = n; } else // System.out.println(str + ":" + pos_caracter_especial); if (pos_caracter_especial != -1) { int posicao = str.length() - (n - pos_caracter_especial) - 1; char ch = caracterEspecial(str, posicao); if (ch != '\0') { if (caracterFazParteDePalavra(ch)) { str = str.substring(0, posicao) + converteChar(ch); } else { str = str.substring(0, posicao); estado = PALAVRA; if (em_titulo) { titulo += str + ch; } } } if ((c == ';') || (n - pos_caracter_especial) > 9) { pos_caracter_especial = -1; } } } else { estado = PALAVRA; if (em_titulo) { titulo += str; } if (!(c == '<')) { if (em_titulo) { // if(!Character.isLetterOrDigit(c)){ // c = ' '; // } titulo += c; } n++; } } break; case PALAVRA: // System.out.println("PALAVRA:"+lastTag); if (insideATag) { anchor = anchor + " " + str.toLowerCase(); numOfwordsAnchor++; // insideATag = false; // System.out.println("ANCHOR:"+anchor); } // if(anchor.indexOf("school") != -1){ // System.out.println("TEST"); // } /* PALAVRA - palavra pronta */ if (!em_script && (str.length() > 0)) { if (em_body && paragrafo.length() + str.length() < MAX_PARAGRAPH_SIZE) { if (Character.isWhitespace(c)) { paragrafo += str + c; // atualiza variavel paragrafo } else { paragrafo += str + " "; } } if (!em_titulo) { boolean adicionou = adicionaAoVetorDeTexto(str); if (adicionou) { around.add(str); adicionaTermoPosicao(str, posicao_da_palavra); // atualiza o centroide if (em_option) { adicionaPontuacaoTermo(str, PONTUACAO_PALAVRAS_OPTION); } else { adicionaPontuacaoTermo(str, PONTUACAO_PALAVRAS_TEXTO); } String str_sem_acento = Acentos.retirarNotacaoHTMLAcentosANSI(str); if (!str_sem_acento.equals(str)) { adicionou = adicionaAoVetorDeTexto(str_sem_acento); if (adicionou) { adicionaTermoPosicao(str_sem_acento, posicao_da_palavra); // atualiza o centroide if (em_option) { adicionaPontuacaoTermo(str_sem_acento, PONTUACAO_PALAVRAS_OPTION); } else { adicionaPontuacaoTermo(str_sem_acento, PONTUACAO_PALAVRAS_TEXTO); } } } posicao_da_palavra++; } } else { boolean adicionou = adicionaAoVetorDeTexto(str); if (adicionou) { adicionaTermoPosicao(str, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(str, PONTUACAO_PALAVRAS_TITULO); String str_sem_acento = Acentos.retirarNotacaoHTMLAcentosANSI(str); if (!str_sem_acento.equals(str)) { adicionou = adicionaAoVetorDeTexto(str_sem_acento); if (adicionou) { adicionaTermoPosicao(str_sem_acento, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(str_sem_acento, PONTUACAO_PALAVRAS_TITULO); } } posicao_da_palavra++; } } } estado = INICIO; ignorar_espacos = true; str = ""; break; case TAG_NAME: /* TAG_NAME - terminated by space, \r, \n, >, / */ if (em_script) { if (c != '>') { if ("/script".startsWith(str + c) || "/SCRIPT".startsWith(str + c) || "/style".startsWith(str + c) || "/STYLE".startsWith(str + c)) { str += c; } else { str = ""; estado = INICIO; } n++; } else if (c == '>') { if (str.equalsIgnoreCase("/script") || str.equalsIgnoreCase("/style")) { fimDeString = true; tag_tipo_fim = true; tagScript = true; estado = FECHANDO; } else { n++; } } } else { if (str.equals("BASE")) { // System.out.println("EM TAG_NAME, str="+str + ", c="+c+", tagTitulo="+tagTitulo); if (c == '>') { estado = FECHANDO; } else { n++; } } else { // if ((c == '"') || (c == '\'')) { // if ((c == '\'')) { // organizaDados(); //new // return; /* error - these are not allowed in tagname */ // } else if (c == ' ') { /* * Note: Both mozilla and XML don't allow any spaces between < and tagname. * Need to check for zero-length tagname. */ // if (str.length() == 0) { // organizaDados(); //new // return; /* str is the buffer we're working on */ // } fimDeString = true; estado = ATRIBUTO; ignorar_espacos = true; n++; } else if (c == '/') { if (tagName.length() == 0) { tag_tipo_fim = true; /* indicates end tag if no tag name read yet */ } else if (obj_isRDF) { /* otherwise its an empty tag (RDF only) */ fimDeString = true; tag_tipo_vazia = true; estado = FECHANDO; } // else { // organizaDados(); //new // return; // } n++; } else if (c == '>') { fimDeString = true; // tag_tipo_fim = true; estado = FECHANDO; } else if ((c != '\r') && (c != '\n')) { // System.out.println("Estou NO CAMINHO CERTO!!!!"); str += c; n++; } else { fimDeString = true; estado = ATRIBUTO; /* note - mozilla allows newline after tag name */ ignorar_espacos = true; n++; } if (fimDeString) { //if (str.equals("!--")) { /* html comment */ if (str.startsWith("!--")) { /* html comment */ em_comentario = true; num_comentario++; estado = INICIO; } else { str = str.toLowerCase(); tagName = str; tagBody = str.equals("body"); tagTitulo = str.equals("title"); tagOption = str.equals("option"); if (tagName.equals("html")) { if (!tag_tipo_fim) { numOfHtmlTags++; } else { numOfHtmlTags--; } // System.out.println(">>>>>>>>>>>>>" + numOfHtmlTags); } //if (tagTitulo) { // System.out.println("achot tag titulo " + str); //} tagScript = str.equals("script") || str.equals("style"); if (str.equals("form")) { this.forms++; } } str = ""; fimDeString = false; } // System.out.println("A STRING DO ATRIBUTO EH: " + str + " estado novo "+ estado); } } break; case FECHANDO: /* FECHANDO - expecting a close bracket, anything else is an error */ // System.out.println("END OF TAG:"+tagName); // if(ln!=null){ // ln.setAnchor(anchor); // System.out.println("URL---"+ln.getLink()); // System.out.println("ANC---"+ln.getAnchor()); // } if ((tag_tipo_fim && tagName.equals("a")) || tagName.equals("area")) { insideATag = false; if (ln != null) { Vector anchorTemp = new Vector(); // System.out.println("URL---"+ln.getLink()); // System.out.println("ANC---"+anchor); StringTokenizer tokenizer = new StringTokenizer(anchor, " "); while (tokenizer.hasMoreTokens()) { anchorTemp.add(tokenizer.nextToken()); } String[] anchorArray = new String[anchorTemp.size()]; anchorTemp.toArray(anchorArray); ln.setAnchor(anchorArray); ln.setAroundPosition(around.size()); ln.setNumberOfWordsAnchor(numOfwordsAnchor); linkNeigh.add(ln.clone()); // anchor = ""; ln = null; } anchor = ""; } // System.out.println("Entrei em fechando"); if (c == '>') { if (tagScript) { /* we're inside a script tag (not RDF) */ em_script = !tag_tipo_fim; } if (tagTitulo) { em_titulo = !tag_tipo_fim; //System.out.println("EM tag titulo " + str + ", em_titulo"+ em_titulo); //System.out.println("EM tag titulo " + str + ", tag_tipo_fim"+ tag_tipo_fim); //System.out.println("EM tag titulo " + str + ", tagTitulo"+ tagTitulo); } if (tagBody) { em_body = !tag_tipo_fim; // System.out.println("Entrei no estado inicial"); } if (tagOption) { em_option = !tag_tipo_fim; // System.out.println("Entrei no estado inicial"); } // if(tag_tipo_fim && tagName.equals("html") && numOfHtmlTags == 0){ // organizaDados(); // return; // } tagTitulo = false; tagBody = false; tagScript = false; tagOption = false; estado = INICIO; str = ""; tagName = ""; numOfwordsAnchor = 0; ignorar_espacos = true; n++; } else { organizaDados(); //new return; /* error */ } break; case ATRIBUTO: /* ATRIBUTO - expecting an attribute name, or / (RDF only) or > indicating no more attributes */ /* * accept attributes without values, such as <tag attr1 attr2=val2> * or <tag attr2=val2 attr1> */ if (quote_char == c) { quote_char = '\0'; /* close quote */ } else if (((c == '"') || (c == '\'')) && (quote_char == '\0')) { /* start a quote if none is already in effect */ quote_char = c; } if (quote_char == '\0') { if ((((c == '/') && obj_isRDF) || (c == '>')) && (str.length() == 0)) { estado = FECHANDO; } else if ((c == ' ') || (c == '=') || (c == '\n') || (c == '\r') || ((c == '/') && obj_isRDF) || (c == '>')) { atributo = str; str = ""; estado = IGUAL; //System.out.println("[ATRIBUTO c='"+c+"', estado=IGUAL], atributo="+atributo); /* if non-null attribute name */ } else { str += c; n++; } } else { str += c; n++; } break; case IGUAL: atributo = atributo.toLowerCase(); tagName = tagName.toLowerCase(); // System.out.println("------------------------------------"); // System.out.println(" A TAG NAME EH: " + tagName); // if(atributo.equals("src") && tagName.equals("img") && (c == '=')) // { // ignorar_espacos = true; // estado = IMAGEM; // n++; // } // else // { /**** if (atributo.equals("content") && tagName.equals("meta") && (c == '=')) { ignorar_espacos = true; estado = META_TAG; n++; } else if (atributo.equals("alt") && tagName.equals("img") && (c == '=')) { ignorar_espacos = true; estado = ALT_TAG; n++; } else { ***/ if ((c == ' ') || (c == '\n') || (c == '\r')) { ignorar_espacos = true; n++; } else if (c == '=') { ignorar_espacos = true; estado = VALOR; n++; } else { /* no value for the attribute - error in RDF? */ str = ""; atributo = ""; // estado = ATRIBUTO; if (c == '>') { // System.out.println("Entrei aqui no MENOR QUE"); tagScript = false; tagBody = false; tagTitulo = false; estado = FECHANDO; } else { ignorar_espacos = true; // System.out.println("Entrei PARA ANDAR NA LINHA"); n++; } } // } // } break; case ALT_TAG: // nao usa mais, foi mudado, ver no estado VALOR if (((c == ' ') || (c == '"')) && ehInicioALT) { ignorar_espacos = false; boolean adicionou = adicionaAoVetorDeTexto(str); if (adicionou) { adicionaTermoPosicao(str, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(str, PONTUACAO_PALAVRAS_ALT); String str_sem_acento = Acentos.retirarNotacaoHTMLAcentosANSI(str); if (!str_sem_acento.equals(str)) { adicionou = adicionaAoVetorDeTexto(str_sem_acento); if (adicionou) { adicionaTermoPosicao(str_sem_acento, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(str_sem_acento, PONTUACAO_PALAVRAS_ALT); } } posicao_da_palavra++; } str = ""; ehInicioALT = false; } else { if (c == '>') { // estado = INICIO; //nao sei se esta' ok estado = VALOR; ehInicioALT = true; } else { if (c == '.' || c == ',') { } else { if ((c != '\0') && (c != '\r') && (c != '\n') && (c != '"')) { str += c; } else { if (c == '"') { estado = ATRIBUTO; ehInicioALT = true; } } } } } n++; break; case META_TAG: // nao usa mais, foi mudado, ver no estado VALOR [ogm] if ((c == ' ') || (c == '"') || (c == '\n') || (c == ',')) { ignorar_espacos = false; textoMeta.addElement(str); // adiciona a palavra na variavel texto for (int contadorI = 0; contadorI < PONTUACAO_PALAVRAS_META; contadorI++) { adicionaTermoMetaPosicao(str, textoMeta.size()); } str = ""; } else { if (c == '>') { estado = INICIO; // estado = VALOR; } else { if (c == '.' || c == ',') { } else { if ((c != '\0') && (c != '\r') && (c != '\n') && (c != '"')) { str += c; } } } } n++; break; case VALOR: /* expecting a value, or space, / (RDF only), or > indicating end of value. */ /* whether the current character should be included in value */ boolean include = true; // System.out.println("LENGTH:"+str.length()); // if(str.length() > 300){ // System.out.println("TEST"); // } if (quote_char == c || str.length() > 10000) { quote_char = '\0'; /* close quote */ include = false; } else if (((c == '"') || (c == '\'')) && (quote_char == '\0')) { /* start a quote if none is already in effect */ quote_char = c; include = false; } if (quote_char == '\0') { if ((c == '/') && obj_isRDF) { fimDeString = true; estado = FECHANDO; n++; // } else if (c == '>' || str.length() > 10000) { } else if (c == '>' || str.length() > 100000) { fimDeString = true; estado = FECHANDO; } else if ((c == ' ') || (c == '\r') || (c == '\n')) { fimDeString = true; ignorar_espacos = true; estado = ATRIBUTO; /* if non-null value name */ n++; } else if (include) { str += c; n++; } else { n++; } } else if (include) { str += c; n++; } else { n++; } if (fimDeString) { tagName = tagName.toLowerCase(); // System.out.println("TAG:"+tagName); atributo = atributo.toLowerCase(); // System.out.println("[VALOR, estado='"+estado+"', c="+c+"] "+tagName+"."+atributo+"="+str); if (tagName.equals("a") && atributo.equals("href")) { insideATag = true; String urlTemp = adicionaLink(str, base); // System.out.println("----URL:"+urlTemp); if (urlTemp != null && urlTemp.startsWith("http")) { if (ln != null) { Vector anchorTemp = new Vector(); StringTokenizer tokenizer = new StringTokenizer(anchor, " "); while (tokenizer.hasMoreTokens()) { anchorTemp.add(tokenizer.nextToken()); } String[] anchorArray = new String[anchorTemp.size()]; anchorTemp.toArray(anchorArray); ln.setAnchor(anchorArray); ln.setAroundPosition(around.size()); ln.setNumberOfWordsAnchor(numOfwordsAnchor); linkNeigh.add(ln.clone()); anchor = ""; ln = null; } ln = new LinkNeighborhood(new URL(urlTemp)); } // System.out.println("CREATE LINK:" + urlTemp); } else if (tagName.equals("link") && atributo.equals("href")) { String urlTemp = adicionaLink(str, base); if (urlTemp != null && urlTemp.startsWith("http")) { ln = new LinkNeighborhood(new URL(urlTemp)); } // System.out.println("CREATE LINK:" + urlTemp); } else if (tagName.equals("area") && atributo.equals("href")) { adicionaLink(str, base); String urlTemp = adicionaLink(str, base); if (urlTemp != null && urlTemp.startsWith("http")) { ln = new LinkNeighborhood(new URL(urlTemp)); } } else if (tagName.equals("img") && atributo.equals("src")) { if (ln != null) { ln.setImgSource(str); } try { imagens.addElement(parseLink(base, str).toString()); } catch (Exception e) { // TODO: handle exception } } // else if((tagName.equals("area") || tagName.equals("a"))&& atributo.equals("alt")){ // anchor = anchor + " " + str.toLowerCase(); // } else if (tagName.equals("frame") && atributo.equals("src")) { frames.addElement(str); adicionaLink(str, base); } else if (tagName.equals("img") && (atributo.equals("alt") || atributo.equals("title") || atributo.equals("id"))) { // System.out.println("img.alt.str="+str); Vector<String> altWords = new Vector<String>(); StringTokenizer st = new StringTokenizer(str); while (st.hasMoreTokens()) { String token = st.nextToken(); if (token.contains("")) { token = token.replace("", "n"); } token = token.toLowerCase(); if (token.contains("ñ")) { token = token.replace("ñ", "n"); } if (token.contains("ñ")) { token = token.replace("ñ", "n"); } if (token.contains("")) { token = token.replace("", "n"); } altWords.add(token); if (!caracterFazParteDePalavra(token.charAt(0))) { token = token.substring(1); } if (token.equals("")) { break; } if (!caracterFazParteDePalavra(token.charAt(token.length() - 1))) { token = token.substring(0, token.length() - 1); } if (token.equals("")) { break; } boolean adicionou = adicionaAoVetorDeTexto(token); if (adicionou) { adicionaTermoPosicao(token, posicao_da_palavra); // atualbejiza o centroide adicionaPontuacaoTermo(token, PONTUACAO_PALAVRAS_ALT); String token_sem_acento = Acentos.retirarNotacaoHTMLAcentosANSI(token); if (!token_sem_acento.equals(token)) { adicionou = adicionaAoVetorDeTexto(token_sem_acento); if (adicionou) { adicionaTermoPosicao(token_sem_acento, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(token_sem_acento, PONTUACAO_PALAVRAS_ALT); } } posicao_da_palavra++; } } if (ln != null) { String[] current = ln.getImgAlt(); if (current == null) { String[] terms = new String[altWords.size()]; altWords.toArray(terms); ln.setImgAlt(terms); } else { String[] terms = new String[altWords.size() + current.length]; int indexTerms = 0; for (int i = 0; i < current.length; i++, indexTerms++) { terms[indexTerms] = current[i]; } for (int i = 0; i < altWords.size(); i++, indexTerms++) { terms[indexTerms] = altWords.elementAt(i); } ln.setImgAlt(terms); } } } else if (tagName.equals("meta") && atributo.equals("content")) { if (em_meta_description) { str_da_metatag_description = str; em_meta_description = false; if (USAR_DESCRIPTION) { StringTokenizer st = new StringTokenizer(str); while (st.hasMoreTokens()) { String token = st.nextToken(); int posicao = texto.size(); boolean adicionou = adicionaAoVetorDeTexto(token); if (adicionou) { adicionaTermoPosicao(token, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(token, PONTUACAO_PALAVRAS_DESCRIPTION); String token_sem_acento = Acentos .retirarNotacaoHTMLAcentosANSI(token); if (!token_sem_acento.equals(token)) { adicionou = adicionaAoVetorDeTexto(token_sem_acento); if (adicionou) { adicionaTermoPosicao(token_sem_acento, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(token_sem_acento, PONTUACAO_PALAVRAS_DESCRIPTION); } } posicao_da_palavra++; } } } } // System.out.println("meta.content.str="+str); StringTokenizer st = new StringTokenizer(str); while (st.hasMoreTokens()) { String token = st.nextToken(); textoMeta.addElement(token); // adiciona a palavra na variavel texto for (int contadorI = 0; contadorI < PONTUACAO_PALAVRAS_META; contadorI++) { adicionaTermoMetaPosicao(token, textoMeta.size()); } } } else if (tagName.equals("meta") && atributo.equals("name")) { if (str.toLowerCase().equals("robot")) { em_meta_robots = true; } if (str.toLowerCase().equals("description") || str.toLowerCase().equals("descricao")) { //System.out.println("meta.description.str="+str); em_meta_description = true; } } else if (em_meta_robots && atributo.equals("content")) { if (str.toLowerCase().indexOf("noindex") != -1) { noindex = true; } if (str.toLowerCase().indexOf("nofollow") != -1) { nofollow = true; } } else if (tagName.equals("base") && atributo.equals("href")) { try { base = parseLink(pagina, str); } catch (Exception e) { } // ignora } str = ""; atributo = ""; fimDeString = false; } break; default: break; } } } if (USAR_DESCRIPTION) { if (str_da_metatag_description != null) { paragrafo = str_da_metatag_description; } } if (estado == PALAVRA && str != null && !"".equals(str)) { boolean adicionou = adicionaAoVetorDeTexto(str); if (adicionou) { adicionaTermoPosicao(str, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(str, PONTUACAO_PALAVRAS_TEXTO); String str_sem_acento = Acentos.retirarNotacaoHTMLAcentosANSI(str); if (!str_sem_acento.equals(str)) { adicionou = adicionaAoVetorDeTexto(str_sem_acento); if (adicionou) { adicionaTermoPosicao(str_sem_acento, posicao_da_palavra); // atualiza o centroide adicionaPontuacaoTermo(str_sem_acento, PONTUACAO_PALAVRAS_TEXTO); } } posicao_da_palavra++; } } } catch (Exception e) { e.printStackTrace(); } this.frames = frames.size(); this.images = imagens.size(); organizaDados(); }
From source file:edu.ku.brc.specify.tasks.subpane.wb.DataImportDialog.java
/** * Parses the given import xls file according to the users selection and creates/updates the * Preview table, showing the user how the import options effect the way the data will be * imported into the spreadsheet.//from ww w. j a v a 2s. c om * * @param table - the table to display the data * @return JTable - the table to display the data */ private JTable setXLSTableData(final JTable table) { int numRows = 0; int numCols = 0; String[] headers = {}; Vector<Vector<String>> tableDataVector = new Vector<Vector<String>>(); Vector<String> rowData = new Vector<String>(); Vector<String> headerVector = new Vector<String>(); DateWrapper scrDateFormat = AppPrefsCache.getDateWrapper("ui", "formatting", "scrdateformat"); try { log.debug("setXLSTableData - file - " + configXLS.getFile().toString()); InputStream input = new FileInputStream(configXLS.getFile()); POIFSFileSystem fs = new POIFSFileSystem(input); HSSFWorkbook workBook = new HSSFWorkbook(fs); HSSFSheet sheet = workBook.getSheetAt(0); Vector<Integer> badHeads = new Vector<Integer>(); Vector<Integer> emptyCols = new Vector<Integer>(); ((ConfigureXLS) config).checkHeadsAndCols(sheet, badHeads, emptyCols); if (badHeads.size() > 0 && doesFirstRowHaveHeaders) { if (table != null) { ((ConfigureXLS) config).showBadHeadingsMsg(badHeads, emptyCols, getTitle()); } this.doesFirstRowHaveHeaders = false; try { ignoreActions = true; this.containsHeaders.setSelected(false); } finally { ignoreActions = false; } if (table != null) { return table; } } boolean firstRow = true; //quick fix to prevent ".0" at end of catalog numbers etc NumberFormat nf = NumberFormat.getInstance(); nf.setMinimumFractionDigits(0); nf.setMaximumFractionDigits(20); nf.setGroupingUsed(false); //gets rid of commas int maxCols = 0; // Iterate over each row in the sheet Iterator<?> rows = sheet.rowIterator(); while (rows.hasNext()) { numCols = 0; rowData = new Vector<String>(); HSSFRow row = (HSSFRow) rows.next(); //log.debug(row.getLastCellNum()+" "+row.getPhysicalNumberOfCells()); int maxSize = Math.max(row.getPhysicalNumberOfCells(), row.getLastCellNum()); if (maxSize > maxCols) { maxCols = maxSize; } while (numCols < maxSize) { if (emptyCols.indexOf(new Integer(numCols)) == -1) { HSSFCell cell = row.getCell(numCols); String value = null; // if cell is blank, set value to "" if (cell == null) { value = ""; } else { int type = cell.getCellType(); switch (type) { case HSSFCell.CELL_TYPE_NUMERIC: // The best I can do at this point in the app is to guess if a // cell is a date. // Handle dates carefully while using HSSF. Excel stores all // dates as numbers, internally. // The only way to distinguish a date is by the formatting of // the cell. (If you // have ever formatted a cell containing a date in Excel, you // will know what I mean.) // Therefore, for a cell containing a date, cell.getCellType() // will return // HSSFCell.CELL_TYPE_NUMERIC. However, you can use a utility // function, // HSSFDateUtil.isCellDateFormatted(cell), to check if the cell // can be a date. // This function checks the format against a few internal // formats to decide the issue, // but by its very nature it is prone to false negatives. if (HSSFDateUtil.isCellDateFormatted(cell)) { value = scrDateFormat.getSimpleDateFormat().format(cell.getDateCellValue()); //value = scrDateFormat.getSimpleDateFormat().format(cell.getDateCellValue()); } else { double numeric = cell.getNumericCellValue(); value = nf.format(numeric); } break; case HSSFCell.CELL_TYPE_STRING: value = cell.getRichStringCellValue().getString(); break; case HSSFCell.CELL_TYPE_BLANK: value = ""; break; case HSSFCell.CELL_TYPE_BOOLEAN: value = Boolean.toString(cell.getBooleanCellValue()); break; case HSSFCell.CELL_TYPE_FORMULA: value = UIRegistry.getResourceString("WB_FORMULA_IMPORT_NO_PREVIEW"); break; default: value = ""; log.error("unsuported cell type"); break; } } if (firstRow && doesFirstRowHaveHeaders) { checkUserColInfo(value, numCols); } if (isUserCol(numCols)) { rowData.add(value.toString()); } } numCols++; } if (doesFirstRowHaveHeaders && firstRow) { headerVector = rowData; headers = new String[rowData.size()]; } else if (!doesFirstRowHaveHeaders && firstRow) { //headers = createDummyHeaders(rowData.size()); tableDataVector.add(rowData); } else { tableDataVector.add(rowData); } firstRow = false; numRows++; } maxCols -= emptyCols.size(); if (!doesFirstRowHaveHeaders) { headerVector = createDummyHeadersAsVector(maxCols); headers = new String[maxCols]; } for (int i = 0; i < headerVector.size(); i++) { headers[i] = headerVector.elementAt(i); } printArray(headers); String[][] tableData = new String[tableDataVector.size()][maxCols]; for (int i = 0; i < tableDataVector.size(); i++) { Vector<String> v = tableDataVector.get(i); for (int j = 0; j < v.size(); j++) { tableData[i][j] = v.get(j).toString(); } } if (checkForErrors(headers, tableData)) { errorPanel.showDataImportStatusPanel(true); } else { errorPanel.showDataImportStatusPanel(false); } if ((doesFirstRowHaveHeaders ? numRows - 1 : numRows) > WorkbenchTask.MAX_ROWS) { hasTooManyRows = true; showTooManyRowsErrorDialog(); } else { hasTooManyRows = false; } log.debug(headers); log.debug(tableData); model = new PreviewTableModel(headers, tableData); JTable result = null; if (table == null) { result = new JTable(); result.setColumnSelectionAllowed(false); result.setRowSelectionAllowed(false); result.setCellSelectionEnabled(false); result.getTableHeader().setReorderingAllowed(false); result.setPreferredScrollableViewportSize(new Dimension(500, 100)); result.setAutoResizeMode(JTable.AUTO_RESIZE_OFF); } else { result = table; } result.setModel(model); result.setDefaultRenderer(String.class, new BiColorTableCellRenderer(false)); model.fireTableDataChanged(); model.fireTableStructureChanged(); return result; } catch (Exception ex) { UIRegistry.displayErrorDlgLocalized(UIRegistry.getResourceString("WB_ERROR_READING_IMPORT_FILE")); if (table != null) { String[] columnNames = {}; String[][] blankData = { {} }; model = new PreviewTableModel(columnNames, blankData); table.setModel(model); table.setColumnSelectionAllowed(false); table.setRowSelectionAllowed(false); table.setCellSelectionEnabled(false); table.getTableHeader().setReorderingAllowed(false); table.setPreferredScrollableViewportSize(new Dimension(500, 100)); table.setAutoResizeMode(JTable.AUTO_RESIZE_OFF); table.setDefaultRenderer(String.class, new BiColorTableCellRenderer(false)); model.fireTableDataChanged(); model.fireTableStructureChanged(); return table; } //log.error("Error attempting to parse input xls file:" + ex); //ex.printStackTrace(); } return null; }
From source file:com.sonicle.webtop.mail.Service.java
private Address[] eliminateDuplicates(Vector v, Address[] addrs) { if (addrs == null) { return null; }/* w w w. java 2 s .c om*/ int gone = 0; for (int i = 0; i < addrs.length; i++) { boolean found = false; // search the vector for this address for (int j = 0; j < v.size(); j++) { if (((InternetAddress) v.elementAt(j)).equals(addrs[i])) { // found it; count it and remove it from the input array found = true; gone++; addrs[i] = null; break; } } if (!found) { v.addElement(addrs[i]); // add new address to vector } } // if we found any duplicates, squish the array if (gone != 0) { Address[] a; // new array should be same type as original array // XXX - there must be a better way, perhaps reflection? if (addrs instanceof InternetAddress[]) { a = new InternetAddress[addrs.length - gone]; } else { a = new Address[addrs.length - gone]; } for (int i = 0, j = 0; i < addrs.length; i++) { if (addrs[i] != null) { a[j++] = addrs[i]; } } addrs = a; } return addrs; }
From source file:nl.nn.adapterframework.extensions.svn.ScanTibcoSolutionPipe.java
private void addFileContent(XMLStreamWriter xmlStreamWriter, String urlString, String type) throws XMLStreamException { xmlStreamWriter.writeStartElement("content"); xmlStreamWriter.writeAttribute("type", type); String content;//from www .ja va 2 s . c o m try { content = getHtml(urlString); } catch (Exception e) { error(xmlStreamWriter, "error occured during getting file content", e, true); content = null; } if (content != null) { Vector<String> warnMessage = new Vector<String>(); try { if (type.equals("jmsDest") || type.equals("jmsDestConf")) { // AMX - receive (for jmsInboundDest) Collection<String> c1 = XmlUtils.evaluateXPathNodeSet(content, "namedResource/@name"); if (c1 != null && c1.size() > 0) { if (c1.size() > 1) { warnMessage.add("more then one resourceName found"); } String resourceName = (String) c1.iterator().next(); xmlStreamWriter.writeStartElement("resourceName"); xmlStreamWriter.writeCharacters(resourceName); xmlStreamWriter.writeEndElement(); } else { warnMessage.add("no resourceName found"); } Collection<String> c2 = XmlUtils.evaluateXPathNodeSet(content, "namedResource/configuration/@jndiName"); if (c2 != null && c2.size() > 0) { if (c2.size() > 1) { warnMessage.add("more then one resourceJndiName found"); } String resourceJndiName = (String) c2.iterator().next(); xmlStreamWriter.writeStartElement("resourceJndiName"); xmlStreamWriter.writeCharacters(resourceJndiName); xmlStreamWriter.writeEndElement(); } else { warnMessage.add("no resourceJndiName found"); } } else if (type.equals("composite")) { // AMX - receive Collection<String> c1 = XmlUtils.evaluateXPathNodeSet(content, "composite/service/bindingAdjunct/property[@name='JmsInboundDestinationConfig']/@simpleValue"); if (c1 != null && c1.size() > 0) { for (Iterator<String> c1it = c1.iterator(); c1it.hasNext();) { xmlStreamWriter.writeStartElement("jmsInboundDest"); xmlStreamWriter.writeCharacters(c1it.next()); xmlStreamWriter.writeEndElement(); } } else { warnMessage.add("no jmsInboundDest found"); } // AMX - send Collection<String> c2 = XmlUtils.evaluateXPathNodeSet(content, "composite/reference/interface.wsdl/@wsdlLocation"); if (c2 != null && c2.size() > 0) { for (Iterator<String> c2it = c2.iterator(); c2it.hasNext();) { String itn = c2it.next(); String wsdl = null; try { URL url = new URL(urlString); URL wsdlUrl = new URL(url, itn); wsdl = getHtml(wsdlUrl.toString()); } catch (Exception e) { error(xmlStreamWriter, "error occured during getting wsdl file content", e, true); wsdl = null; } if (wsdl != null) { Collection<String> c3 = XmlUtils.evaluateXPathNodeSet(wsdl, // "definitions/service/port/targetAddress", // "concat(.,';',../../@name)"); "definitions/service/port/targetAddress"); if (c3 != null && c3.size() > 0) { for (Iterator<String> c3it = c3.iterator(); c3it.hasNext();) { xmlStreamWriter.writeStartElement("targetAddr"); xmlStreamWriter.writeCharacters(c3it.next()); xmlStreamWriter.writeEndElement(); } } else { warnMessage.add("no targetAddr found"); } } else { warnMessage.add("wsdl [" + itn + "] not found"); } } } else { warnMessage.add("no wsdlLocation found"); } } else if (type.equals("process")) { // BW - receive Double d1 = XmlUtils.evaluateXPathNumber(content, "count(ProcessDefinition/starter[type='com.tibco.plugin.soap.SOAPEventSource']/config)"); if (d1 > 0) { Collection<String> c1 = XmlUtils.evaluateXPathNodeSet(content, "ProcessDefinition/starter[type='com.tibco.plugin.soap.SOAPEventSource']/config/sharedChannels/jmsChannel/JMSTo"); if (c1 != null && c1.size() > 0) { for (Iterator<String> c1it = c1.iterator(); c1it.hasNext();) { xmlStreamWriter.writeStartElement("jmsTo"); xmlStreamWriter.writeAttribute("type", "soapEventSource"); xmlStreamWriter.writeCharacters(c1it.next()); xmlStreamWriter.writeEndElement(); } } else { warnMessage.add("no jmsTo found for soapEventSource"); } } else { warnMessage.add("no soapEventSource found"); } // BW - send Double d2 = XmlUtils.evaluateXPathNumber(content, "count(ProcessDefinition/activity[type='com.tibco.plugin.soap.SOAPSendReceiveActivity']/config)"); if (d2 > 0) { Collection<String> c2 = XmlUtils.evaluateXPathNodeSet(content, "ProcessDefinition/activity[type='com.tibco.plugin.soap.SOAPSendReceiveActivity']/config/sharedChannels/jmsChannel/JMSTo"); if (c2 != null && c2.size() > 0) { for (Iterator<String> c2it = c2.iterator(); c2it.hasNext();) { xmlStreamWriter.writeStartElement("jmsTo"); xmlStreamWriter.writeAttribute("type", "soapSendReceiveActivity"); xmlStreamWriter.writeCharacters(c2it.next()); xmlStreamWriter.writeEndElement(); } } else { warnMessage.add("no jmsTo found for soapSendReceiveActivity"); } } else { warnMessage.add("no soapSendReceiveActivity found"); } } else if (type.equals("substVar")) { String path = StringUtils .substringBeforeLast(StringUtils.substringAfterLast(urlString, "/defaultVars/"), "/"); Map<String, String> m1 = XmlUtils.evaluateXPathNodeSet(content, "repository/globalVariables/globalVariable", "name", "value"); if (m1 != null && m1.size() > 0) { for (Iterator<String> m1it = m1.keySet().iterator(); m1it.hasNext();) { Object key = m1it.next(); Object value = m1.get(key); xmlStreamWriter.writeStartElement("globalVariable"); xmlStreamWriter.writeAttribute("name", (String) key); xmlStreamWriter.writeAttribute("ref", "%%" + path + "/" + key + "%%"); xmlStreamWriter.writeCharacters((String) value); xmlStreamWriter.writeEndElement(); } } else { warnMessage.add("no globalVariable found"); } /* * } else { content = XmlUtils.removeNamespaces(content); * xmlStreamWriter.writeCharacters(content); */ } } catch (Exception e) { error(xmlStreamWriter, "error occured during processing " + type + " file", e, true); } if (warnMessage.size() > 0) { xmlStreamWriter.writeStartElement("warnMessages"); for (int i = 0; i < warnMessage.size(); i++) { xmlStreamWriter.writeStartElement("warnMessage"); xmlStreamWriter.writeCharacters(warnMessage.elementAt(i)); xmlStreamWriter.writeEndElement(); } xmlStreamWriter.writeEndElement(); } } xmlStreamWriter.writeEndElement(); }