1 /*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2011-2017 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.sleuthkit.autopsy.keywordsearch;
20
21 import java.io.BufferedReader;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.logging.Level;
25 import org.apache.solr.client.solrj.SolrServerException;
26 import org.apache.solr.common.SolrInputDocument;
27 import org.openide.util.NbBundle;
43
47 //JMTODO: Should this class really be a singleton?
48 class Ingester {
49
50 private static final Logger logger = Logger.getLogger(Ingester.class.getName());
51 private volatile boolean uncommitedIngests = false;
52 private final Server solrServer = KeywordSearch.getServer();
53 private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
54 private static Ingester instance;
55 private static final int SINGLE_READ_CHARS = 512;
56
57 private Ingester() {
58 }
59
60 public static synchronized Ingester getDefault() {
61 if (instance == null) {
62 instance = new Ingester();
63 }
64 return instance;
65 }
66
67 //JMTODO: this is probably useless
68 @Override
69 @SuppressWarnings("FinalizeDeclaration")
70 protected void finalize() throws Throwable {
71 super.finalize();
72
73 // Warn if files might have been left uncommited.
74 if (uncommitedIngests) {
75 logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
76 }
77 }
78
89 void indexMetaDataOnly(AbstractFile file) throws IngesterException {
90 indexChunk("", file.getName(), getContentFields(file));
91 }
92
103 void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
104 indexChunk("", new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
105 }
106
115 private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
116 return item.accept(SOLR_FIELDS_VISITOR);
117 }
118
139 < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
140 final long sourceID = extractor.getID(source);
141 final String sourceName = extractor.getName(source);
142
143 int numChunks = 0; //unknown until chunking is done
144
145 if (extractor.isDisabled()) {
146 /* some Extractors, notable the strings extractor, have options
147 * which can be configured such that no extraction should be done */
148 return true;
149 }
150
151 Map<String, String> fields = getContentFields(source);
152 //Get a reader for the content of the given source
153 try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
154 Chunker chunker = new Chunker(reader);
155 for (Chunk chunk : chunker) {
156 String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
157 fields.put(Server.Schema.ID.toString(), chunkId);
158 fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
159 try {
160 //add the chunk text to Solr index
161 indexChunk(chunk.toString(), sourceName, fields);
162 numChunks++;
163 } catch (Ingester.IngesterException ingEx) {
164 extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
165 + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
166
167 throw ingEx; //need to rethrow to signal error and move on
168 }
169 }
170 if (chunker.hasException()) {
171 extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
172 return false;
173 }
174 } catch (Exception ex) {
175 extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
176 return false;
177 } finally {
178 //after all chunks, index just the meta data, including the numChunks, of the parent file
179 fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
180 //reset id field to base document id
181 fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
182 //"parent" docs don't have chunk_size
183 fields.remove(Server.Schema.CHUNK_SIZE.toString());
184 indexChunk(null, sourceName, fields);
185 }
186
187 return true;
188 }
189
203 private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
204 if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
205 //JMTODO: actually if the we couldn't get the image id it is set to -1,
206 // but does this really mean we don't want to index it?
207
208 //skip the file, image id unknown
209 //JMTODO: does this need to ne internationalized?
210 String msg = NbBundle.getMessage(Ingester.class,
211 "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
212 logger.log(Level.SEVERE, msg);
213 throw new IngesterException(msg);
214 }
215
216 //Make a SolrInputDocument out of the field map
217 SolrInputDocument updateDoc = new SolrInputDocument();
218 for (String key : fields.keySet()) {
219 updateDoc.addField(key, fields.get(key));
220 }
221 //add the content to the SolrInputDocument
222 //JMTODO: can we just add it to the field map before passing that in?
223 updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
224
225 try {
226 //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
227 solrServer.addDocument(updateDoc);
228 uncommitedIngests = true;
229
230 } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
231 //JMTODO: does this need to be internationalized?
232 throw new IngesterException(
233 NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
234 }
235 }
236
241 void commit() {
242 try {
243 solrServer.commit();
244 uncommitedIngests = false;
245 } catch (NoOpenCoreException | SolrServerException ex) {
246 logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
247
248 }
249 }
250
255
256 @Override
258 return new HashMap<>();
259 }
260
261 @Override
264 }
265
266 @Override
269 }
270
271 @Override
274 }
275
276 @Override
278 // layout files do not have times
280 }
281
282 @Override
285 }
286
287 @Override
290 }
291
307 return params;
308 }
309
319 Map<String, String> params = new HashMap<>();
321 try {
324 logger.log(Level.SEVERE,
"Could not get data source id to properly index the file " + af.
getId(), ex);
//NON-NLS
325 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
326 }
328 return params;
329 }
330
338 @Override
340 Map<String, String> params = new HashMap<>();
342 try {
343 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
345 logger.log(Level.SEVERE,
"Could not get data source id to properly index the artifact " + artifact.
getArtifactID(), ex);
//NON-NLS
346 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
347 }
348 return params;
349 }
350 }
351
356 static class IngesterException extends Exception {
357
358 private static final long serialVersionUID = 1L;
359
360 IngesterException(String message, Throwable ex) {
361 super(message, ex);
362 }
363
364 IngesterException(String message) {
365 super(message);
366 }
367 }
368 }
Map< String, String > visit(LayoutFile lf)
Map< String, String > visit(File f)
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Map< String, String > visit(SlackFile f)
Map< String, String > visit(Directory d)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile af)
Map< String, String > visit(DerivedFile df)
Map< String, String > visit(BlackboardArtifact artifact)
Map< String, String > visit(LocalFile lf)
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)