Java tutorial
/* * This file is part of NixNote * Copyright 2009 Randy Baumgarte * * This file may be licensed under the terms of of the * GNU General Public License Version 2 (the ``GPL''). * * Software distributed under the License is distributed * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either * express or implied. See the GPL for the specific language * governing rights and limitations. * * You should have received a copy of the GPL along with this * program. If not, go to http://www.gnu.org/licenses/gpl.html * or write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * */ package cx.fbn.nevernote.threads; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.List; import java.util.TreeSet; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.locks.LockSupport; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.microsoft.OfficeParser; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; import org.apache.tika.parser.odf.OpenDocumentParser; import org.apache.tika.parser.pdf.PDFParser; import org.apache.tika.parser.rtf.RTFParser; import org.apache.tika.sax.BodyContentHandler; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import com.evernote.edam.type.Data; import com.evernote.edam.type.Note; import com.evernote.edam.type.Resource; import com.trolltech.qt.core.QByteArray; import com.trolltech.qt.core.QIODevice.OpenModeFlag; import com.trolltech.qt.core.QObject; import com.trolltech.qt.core.QTemporaryFile; import com.trolltech.qt.xml.QDomDocument; import com.trolltech.qt.xml.QDomElement; import com.trolltech.qt.xml.QDomNodeList; import cx.fbn.nevernote.Global; import cx.fbn.nevernote.signals.IndexSignal; import cx.fbn.nevernote.signals.NoteResourceSignal; import cx.fbn.nevernote.signals.NoteSignal; import cx.fbn.nevernote.sql.DatabaseConnection; import cx.fbn.nevernote.utilities.ApplicationLogger; public class IndexRunner extends QObject implements Runnable { private final ApplicationLogger logger; private String guid; private QByteArray resourceBinary; public volatile NoteSignal noteSignal; public volatile NoteResourceSignal resourceSignal; private int indexType; public final int SCAN = 1; public final int REINDEXALL = 2; public final int REINDEXNOTE = 3; public boolean keepRunning; private final QDomDocument doc; private static String regex = Global.getWordRegex(); public String specialIndexCharacters = ""; public boolean indexNoteBody = true; public boolean indexNoteTitle = true; public boolean indexImageRecognition = true; private final DatabaseConnection conn; private volatile LinkedBlockingQueue<String> workQueue; private static int MAX_QUEUED_WAITING = 1000; public boolean interrupt; public boolean idle; public boolean indexAttachmentsLocally = true; public volatile IndexSignal signal; private final TreeSet<String> foundWords; int uncommittedCount = 0; public IndexRunner(String logname, String u, String i, String r, String uid, String pswd, String cpswd) { foundWords = new TreeSet<String>(); logger = new ApplicationLogger(logname); conn = new DatabaseConnection(logger, u, i, r, uid, pswd, cpswd, 500); indexType = SCAN; guid = null; keepRunning = true; doc = new QDomDocument(); workQueue = new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING); } public void setIndexType(int t) { indexType = t; } @Override public void run() { thread().setPriority(Thread.MIN_PRIORITY); noteSignal = new NoteSignal(); resourceSignal = new NoteResourceSignal(); signal = new IndexSignal(); logger.log(logger.EXTREME, "Starting index thread "); while (keepRunning) { idle = true; try { conn.commitTransaction(); uncommittedCount = 0; String work = workQueue.take(); idle = false; if (work.startsWith("SCAN")) { guid = null; interrupt = false; indexType = SCAN; } if (work.startsWith("REINDEXALL")) { guid = null; indexType = REINDEXALL; } if (work.startsWith("REINDEXNOTE")) { work = work.replace("REINDEXNOTE ", ""); guid = work; indexType = REINDEXNOTE; } if (work.startsWith("STOP")) { keepRunning = false; guid = null; } logger.log(logger.EXTREME, "Type:" + indexType); if (indexType == SCAN && keepRunning) { logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources"); scanUnindexed(); setIndexType(0); } if (indexType == REINDEXALL && keepRunning) { logger.log(logger.MEDIUM, "Marking all for reindex"); reindexAll(); setIndexType(0); } if (indexType == REINDEXNOTE && keepRunning) { reindexNote(); } } catch (InterruptedException e) { logger.log(logger.LOW, "Thread interrupted exception: " + e.getMessage()); } } logger.log(logger.EXTREME, "Shutting down database"); conn.dbShutdown(); logger.log(logger.EXTREME, "Database shut down. Exiting thread"); } // Reindex a note public void indexNoteContent() { foundWords.clear(); logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()"); logger.log(logger.EXTREME, "Getting note content"); Note n = conn.getNoteTable().getNote(guid, true, false, true, true, true); String data; if (indexNoteBody) { data = n.getContent(); data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid()); logger.log(logger.EXTREME, "Removing any encrypted data"); data = removeEnCrypt(data.toString()); logger.log(logger.EXTREME, "Removing xml markups"); } else data = ""; String text; if (indexNoteTitle) text = removeTags(StringEscapeUtils.unescapeHtml4(data) + " " + n.getTitle()); else text = removeTags(StringEscapeUtils.unescapeHtml4(data)); logger.log(logger.EXTREME, "Splitting words"); String[] result = text.toString().split(regex); conn.commitTransaction(); conn.beginTransaction(); logger.log(logger.EXTREME, "Deleting existing words for note from index"); conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT"); logger.log(logger.EXTREME, "Number of words found: " + result.length); for (int j = 0; j < result.length && keepRunning; j++) { if (interrupt) { processInterrupt(); } if (!result[j].trim().equals("")) { logger.log(logger.EXTREME, "Result word: " + result[j].trim()); addToIndex(guid, result[j], "CONTENT"); } } // Add tags for (int j = 0; j < n.getTagNamesSize(); j++) { if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals("")) addToIndex(guid, n.getTagNames().get(j), "CONTENT"); } // If we were interrupted, we will reindex this note next time if (Global.keepRunning) { logger.log(logger.EXTREME, "Resetting note guid needed"); conn.getNoteTable().setIndexNeeded(guid, false); } conn.commitTransaction(); uncommittedCount = 0; logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()"); } private String removeTags(String text) { StringBuffer buffer = new StringBuffer(text); boolean inTag = false; for (int i = buffer.length() - 1; i >= 0; i--) { if (buffer.charAt(i) == '>') inTag = true; if (buffer.charAt(i) == '<') inTag = false; if (inTag || buffer.charAt(i) == '<') buffer.deleteCharAt(i); } return buffer.toString(); } public synchronized boolean addWork(String request) { if (workQueue.size() == 0) { workQueue.offer(request); return true; } return false; } public synchronized int getWorkQueueSize() { return workQueue.size(); } public void indexResource() { if (guid == null) return; foundWords.clear(); Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid); if (!indexImageRecognition || r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) resourceBinary = new QByteArray(" "); else resourceBinary = new QByteArray(r.getRecognition().getBody()); conn.commitTransaction(); conn.beginTransaction(); conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE"); // This is due to an old bug & can be removed at some point in the future 11/23/2010 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE"); conn.commitTransaction(); uncommittedCount = 0; conn.beginTransaction(); doc.setContent(resourceBinary); QDomElement docElem = doc.documentElement(); // look for text tags QDomNodeList anchors = docElem.elementsByTagName("t"); for (int i = 0; i < anchors.length() && keepRunning; i++) { if (interrupt) { if (interrupt) { processInterrupt(); } } QDomElement enmedia = anchors.at(i).toElement(); String weight = new String(enmedia.attribute("w")); String text = new String(enmedia.text()).toLowerCase(); if (!text.equals("")) { conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight)); uncommittedCount++; if (uncommittedCount > 100) { conn.commitTransaction(); uncommittedCount = 0; } } } if (Global.keepRunning && indexAttachmentsLocally) { conn.commitTransaction(); uncommittedCount = 0; conn.beginTransaction(); indexResourceContent(guid); } if (Global.keepRunning) conn.getNoteTable().noteResourceTable.setIndexNeeded(guid, false); conn.commitTransaction(); uncommittedCount = 0; } private void indexResourceContent(String guid) { Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true); if (r != null && r.getMime() != null) { if (r.getMime().equalsIgnoreCase("application/pdf")) { indexResourcePDF(r); return; } if (r.getMime().equalsIgnoreCase("application/docx") || r.getMime().equalsIgnoreCase("application/xlsx") || r.getMime().equalsIgnoreCase("application/pptx")) { indexResourceOOXML(r); return; } if (r.getMime().equalsIgnoreCase("application/vsd") || r.getMime().equalsIgnoreCase("application/ppt") || r.getMime().equalsIgnoreCase("application/xls") || r.getMime().equalsIgnoreCase("application/msg") || r.getMime().equalsIgnoreCase("application/doc")) { indexResourceOffice(r); return; } if (r.getMime().equalsIgnoreCase("application/rtf")) { indexResourceRTF(r); return; } if (r.getMime().equalsIgnoreCase("application/odf") || r.getMime().equalsIgnoreCase("application/odt") || r.getMime().equalsIgnoreCase("application/odp") || r.getMime().equalsIgnoreCase("application/odg") || r.getMime().equalsIgnoreCase("application/odb") || r.getMime().equalsIgnoreCase("application/ods")) { indexResourceODF(r); return; } } } private void indexResourceRTF(Resource r) { Data d = r.getData(); for (int i = 0; i < 20 && d.getSize() == 0; i++) d = r.getData(); if (d.getSize() == 0) return; QTemporaryFile f = writeResource(d); if (!keepRunning) { return; } InputStream input; try { input = new FileInputStream(new File(f.fileName())); ContentHandler textHandler = new BodyContentHandler(-1); Metadata metadata = new Metadata(); RTFParser parser = new RTFParser(); ParseContext context = new ParseContext(); parser.parse(input, textHandler, metadata, context); String[] result = textHandler.toString().split(regex); for (int i = 0; i < result.length && keepRunning; i++) { addToIndex(r.getNoteGuid(), result[i], "RESOURCE"); } input.close(); f.close(); } catch (java.lang.ClassCastException e) { logger.log(logger.LOW, "Cast exception: " + e.getMessage()); } catch (FileNotFoundException e) { logger.log(logger.LOW, "FileNotFound exception: " + e.getMessage()); } catch (IOException e) { logger.log(logger.LOW, "IO exception: " + e.getMessage()); } catch (SAXException e) { logger.log(logger.LOW, "SAX exception: " + e.getMessage()); } catch (TikaException e) { logger.log(logger.LOW, "Tika exception: " + e.getMessage()); } catch (Exception e) { logger.log(logger.LOW, "Unknown exception: " + e.getMessage()); } catch (java.lang.NoSuchMethodError e) { logger.log(logger.LOW, "NoSuchMethod error: " + e.getMessage()); } catch (Error e) { logger.log(logger.LOW, "Unknown error: " + e.getMessage()); } } private void indexResourceODF(Resource r) { Data d = r.getData(); for (int i = 0; i < 20 && d.getSize() == 0; i++) d = r.getData(); if (d.getSize() == 0) return; QTemporaryFile f = writeResource(d); if (!keepRunning) { return; } InputStream input; try { input = new FileInputStream(new File(f.fileName())); ContentHandler textHandler = new BodyContentHandler(-1); Metadata metadata = new Metadata(); OpenDocumentParser parser = new OpenDocumentParser(); ParseContext context = new ParseContext(); parser.parse(input, textHandler, metadata, context); String[] result = textHandler.toString().split(regex); for (int i = 0; i < result.length && keepRunning; i++) { if (interrupt) { processInterrupt(); } addToIndex(r.getNoteGuid(), result[i], "RESOURCE"); } input.close(); f.close(); } catch (java.lang.ClassCastException e) { logger.log(logger.LOW, "Cast exception: " + e.getMessage()); } catch (FileNotFoundException e) { logger.log(logger.LOW, "FileNotFound exception: " + e.getMessage()); } catch (IOException e) { logger.log(logger.LOW, "IO exception: " + e.getMessage()); } catch (SAXException e) { logger.log(logger.LOW, "SAX exception: " + e.getMessage()); } catch (TikaException e) { logger.log(logger.LOW, "Tika exception: " + e.getMessage()); } catch (Exception e) { logger.log(logger.LOW, "Unknown exception: " + e.getMessage()); } catch (java.lang.NoSuchMethodError e) { logger.log(logger.LOW, "NoSuchMethod error: " + e.getMessage()); } catch (Error e) { logger.log(logger.LOW, "Unknown error: " + e.getMessage()); } } private void indexResourceOffice(Resource r) { Data d = r.getData(); for (int i = 0; i < 20 && d.getSize() == 0; i++) d = r.getData(); if (d.getSize() == 0) return; QTemporaryFile f = writeResource(d); if (!keepRunning) { return; } InputStream input; try { input = new FileInputStream(new File(f.fileName())); ContentHandler textHandler = new BodyContentHandler(-1); Metadata metadata = new Metadata(); OfficeParser parser = new OfficeParser(); ParseContext context = new ParseContext(); parser.parse(input, textHandler, metadata, context); String[] result = textHandler.toString().split(regex); for (int i = 0; i < result.length && keepRunning; i++) { if (interrupt) { processInterrupt(); } addToIndex(r.getNoteGuid(), result[i], "RESOURCE"); } input.close(); f.close(); } catch (java.lang.ClassCastException e) { logger.log(logger.LOW, "Cast exception: " + e.getMessage()); } catch (FileNotFoundException e) { logger.log(logger.LOW, "FileNotFound exception: " + e.getMessage()); } catch (IOException e) { logger.log(logger.LOW, "IO exception: " + e.getMessage()); } catch (SAXException e) { logger.log(logger.LOW, "SAX exception: " + e.getMessage()); } catch (TikaException e) { logger.log(logger.LOW, "Tika exception: " + e.getMessage()); } catch (Exception e) { logger.log(logger.LOW, "Unknown exception: " + e.getMessage()); } catch (java.lang.NoSuchMethodError e) { logger.log(logger.LOW, "NoSuchMethod error: " + e.getMessage()); } catch (Error e) { logger.log(logger.LOW, "Unknown error: " + e.getMessage()); } } private void indexResourcePDF(Resource r) { Data d = r.getData(); for (int i = 0; i < 20 && d.getSize() == 0; i++) d = r.getData(); if (d.getSize() == 0) return; QTemporaryFile f = writeResource(d); if (!keepRunning) { return; } InputStream input; try { input = new FileInputStream(new File(f.fileName())); ContentHandler textHandler = new BodyContentHandler(-1); Metadata metadata = new Metadata(); PDFParser parser = new PDFParser(); ParseContext context = new ParseContext(); parser.parse(input, textHandler, metadata, context); String[] result = textHandler.toString().split(regex); for (int i = 0; i < result.length && keepRunning; i++) { if (interrupt) { processInterrupt(); } addToIndex(r.getNoteGuid(), result[i], "RESOURCE"); } input.close(); f.close(); } catch (java.lang.ClassCastException e) { logger.log(logger.LOW, "Cast exception: " + e.getMessage()); } catch (FileNotFoundException e) { logger.log(logger.LOW, "FileNotFound exception: " + e.getMessage()); } catch (IOException e) { logger.log(logger.LOW, "IO exception: " + e.getMessage()); } catch (SAXException e) { logger.log(logger.LOW, "SAX exception: " + e.getMessage()); } catch (TikaException e) { logger.log(logger.LOW, "Tika exception: " + e.getMessage()); } catch (Exception e) { logger.log(logger.LOW, "Unknown exception: " + e.getMessage()); } catch (java.lang.NoSuchMethodError e) { logger.log(logger.LOW, "NoSuchMethod error: " + e.getMessage()); } catch (Error e) { logger.log(logger.LOW, "Unknown error: " + e.getMessage()); } } private void indexResourceOOXML(Resource r) { Data d = r.getData(); for (int i = 0; i < 20 && d.getSize() == 0; i++) d = r.getData(); if (d.getSize() == 0) return; QTemporaryFile f = writeResource(d); if (!keepRunning) { return; } InputStream input; try { input = new FileInputStream(new File(f.fileName())); ContentHandler textHandler = new BodyContentHandler(-1); Metadata metadata = new Metadata(); OOXMLParser parser = new OOXMLParser(); ParseContext context = new ParseContext(); parser.parse(input, textHandler, metadata, context); String[] result = textHandler.toString().split(regex); for (int i = 0; i < result.length && keepRunning; i++) { if (interrupt) { processInterrupt(); } addToIndex(r.getNoteGuid(), result[i], "RESOURCE"); } input.close(); f.close(); } catch (java.lang.ClassCastException e) { logger.log(logger.LOW, "Cast exception: " + e.getMessage()); } catch (FileNotFoundException e) { logger.log(logger.LOW, "FileNotFound exception: " + e.getMessage()); } catch (IOException e) { logger.log(logger.LOW, "IO exception: " + e.getMessage()); } catch (SAXException e) { logger.log(logger.LOW, "SAX exception: " + e.getMessage()); } catch (TikaException e) { logger.log(logger.LOW, "Tika exception: " + e.getMessage()); } catch (Exception e) { logger.log(logger.LOW, "Unknown exception: " + e.getMessage()); } catch (java.lang.NoSuchMethodError e) { logger.log(logger.LOW, "NoSuchMethod error: " + e.getMessage()); } catch (Error e) { logger.log(logger.LOW, "Unknown error: " + e.getMessage()); } } private QTemporaryFile writeResource(Data d) { QTemporaryFile newFile = new QTemporaryFile(); newFile.open(OpenModeFlag.WriteOnly); newFile.write(d.getBody()); newFile.close(); return newFile; } private String removeEnCrypt(String content) { int index = content.indexOf("<en-crypt"); int endPos; boolean tagFound = true; while (tagFound && keepRunning) { if (interrupt) { processInterrupt(); } endPos = content.indexOf("</en-crypt>", index) + 11; if (endPos > -1 && index > -1) { content = content.substring(0, index) + content.substring(endPos); index = content.indexOf("<en-crypt"); } else { tagFound = false; } } return content; } private void addToIndex(String guid, String word, String type) { if (foundWords.contains(word)) return; StringBuffer buffer = new StringBuffer(word.toLowerCase()); for (int i = buffer.length() - 1; i >= 0; i--) { if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1) buffer.deleteCharAt(i); else break; } buffer = buffer.reverse(); for (int i = buffer.length() - 1; i >= 0; i--) { if (!Character.isLetterOrDigit(buffer.charAt(i))) buffer.deleteCharAt(i); else break; } buffer = buffer.reverse(); if (buffer.length() > 0) { // We have a good word, now let's trim off junk at the beginning or end if (!foundWords.contains(buffer.toString())) { foundWords.add(buffer.toString()); foundWords.add(word); conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100); uncommittedCount++; if (uncommittedCount > 100) { conn.commitTransaction(); uncommittedCount = 0; } } } return; } private void scanUnindexed() { List<String> notes = conn.getNoteTable().getUnindexed(); guid = null; boolean started = false; if (notes.size() > 0) { signal.indexStarted.emit(); started = true; } for (int i = 0; i < notes.size() && keepRunning; i++) { if (interrupt) { processInterrupt(); } guid = notes.get(i); if (guid != null && keepRunning) { indexNoteContent(); } } List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed(); if (unindexedResources.size() > 0 && !started) { signal.indexStarted.emit(); started = true; } for (int i = 0; i < unindexedResources.size() && keepRunning; i++) { if (interrupt) { processInterrupt(); } guid = unindexedResources.get(i); if (keepRunning) { indexResource(); } } // Cleanup stuff that was deleted at some point List<String> guids = conn.getWordsTable().getGuidList(); logger.log(logger.LOW, "GUIDS in index: " + guids.size()); for (int i = 0; i < guids.size() && keepRunning; i++) { if (!conn.getNoteTable().exists(guids.get(i))) { logger.log(logger.LOW, "Old GUID found: " + guids.get(i)); conn.getWordsTable().expunge(guids.get(i)); } } if (started && keepRunning) signal.indexFinished.emit(); } private void reindexNote() { if (guid == null) return; conn.getNoteTable().setIndexNeeded(guid, true); } private void reindexAll() { conn.getNoteTable().reindexAllNotes(); conn.getNoteTable().noteResourceTable.reindexAll(); } private void waitSeconds(int len) { long starttime = 0; // variable declared //... // for the first time, remember the timestamp starttime = System.currentTimeMillis(); // the next timestamp we want to wake up starttime += (1000.0); // Wait until the desired next time arrives using nanosecond // accuracy timer (wait(time) isn't accurate enough on most platforms) LockSupport.parkNanos((Math.max(0, starttime - System.currentTimeMillis()) * 1000000)); } private void processInterrupt() { conn.commitTransaction(); waitSeconds(1); uncommittedCount = 0; conn.beginTransaction(); interrupt = false; } }