1 /*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2011-2018 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.sleuthkit.autopsy.keywordsearch;
20
21 import java.io.BufferedReader;
22 import java.io.Reader;
23 import java.util.HashMap;
24 import java.util.Map;
25 import java.util.logging.Level;
26 import org.apache.commons.lang3.math.NumberUtils;
27 import org.apache.solr.client.solrj.SolrServerException;
28 import org.apache.solr.common.SolrInputDocument;
29 import org.openide.util.NbBundle;
50
54 //JMTODO: Should this class really be a singleton?
55 class Ingester {
56
57 private static final Logger logger = Logger.getLogger(Ingester.class.getName());
58 private volatile boolean uncommitedIngests = false;
59 private final Server solrServer = KeywordSearch.getServer();
60 private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
61 private static Ingester instance;
62
63 private Ingester() {
64 }
65
66 public static synchronized Ingester getDefault() {
67 if (instance == null) {
68 instance = new Ingester();
69 }
70 return instance;
71 }
72
73 //JMTODO: this is probably useless
74 @Override
75 @SuppressWarnings("FinalizeDeclaration")
76 protected void finalize() throws Throwable {
77 super.finalize();
78
79 // Warn if files might have been left uncommited.
80 if (uncommitedIngests) {
81 logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
82 }
83 }
84
95 void indexMetaDataOnly(AbstractFile file) throws IngesterException {
96 indexChunk("", file.getName().toLowerCase(), getContentFields(file));
97 }
98
109 void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException {
110 indexChunk("", sourceName, getContentFields(artifact));
111 }
112
121 private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
122 return item.accept(SOLR_FIELDS_VISITOR);
123 }
124
144 // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
145 < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException {
146 int numChunks = 0; //unknown until chunking is done
147
148 Map<String, String> fields = getContentFields(source);
149 //Get a reader for the content of the given source
150 try (BufferedReader reader = new BufferedReader(sourceReader)) {
151 Chunker chunker = new Chunker(reader);
152 for (Chunk chunk : chunker) {
153 if (context != null && context.fileIngestIsCancelled()) {
154 logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
155 return false;
156 }
157 String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
158 fields.put(Server.Schema.ID.toString(), chunkId);
159 fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
160 try {
161 //add the chunk text to Solr index
162 indexChunk(chunk.toString(), sourceName, fields);
163 numChunks++;
164 } catch (Ingester.IngesterException ingEx) {
165 logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS
166 + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
167
168 throw ingEx; //need to rethrow to signal error and move on
169 }
170 }
171 if (chunker.hasException()) {
172 logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
173 return false;
174 }
175 } catch (Exception ex) {
176 logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
177 return false;
178 } finally {
179 if (context != null && context.fileIngestIsCancelled()) {
180 return false;
181 } else {
182 //after all chunks, index just the meta data, including the numChunks, of the parent file
183 fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
184 //reset id field to base document id
185 fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
186 //"parent" docs don't have chunk_size
187 fields.remove(Server.Schema.CHUNK_SIZE.toString());
188 indexChunk(null, sourceName, fields);
189 }
190 }
191 return true;
192 }
193
207 private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
208 if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
209 //JMTODO: actually if the we couldn't get the image id it is set to -1,
210 // but does this really mean we don't want to index it?
211
212 //skip the file, image id unknown
213 String msg = NbBundle.getMessage(Ingester.class,
214 "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
215 logger.log(Level.SEVERE, msg);
216 throw new IngesterException(msg);
217 }
218
219 //Make a SolrInputDocument out of the field map
220 SolrInputDocument updateDoc = new SolrInputDocument();
221 for (String key : fields.keySet()) {
222 updateDoc.addField(key, fields.get(key));
223 }
224
225 try {
226 //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
227
228 //add the content to the SolrInputDocument
229 //JMTODO: can we just add it to the field map before passing that in?
230 updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
231
232 // We also add the content (if present) in lowercase form to facilitate case
233 // insensitive substring/regular expression search.
234 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
235 if (indexSchemaVersion >= 2.1) {
236 updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? "" : chunk.toLowerCase()));
237 }
238
239 TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
240
241 solrServer.addDocument(updateDoc);
242 HealthMonitor.submitTimingMetric(metric);
243 uncommitedIngests = true;
244
245 } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
246 //JMTODO: does this need to be internationalized?
247 throw new IngesterException(
248 NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
249 }
250 }
251
256 void commit() {
257 try {
258 solrServer.commit();
259 uncommitedIngests = false;
260 } catch (NoOpenCoreException | SolrServerException ex) {
261 logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
262
263 }
264 }
265
269 static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
270
271 @Override
272 protected Map<String, String>
defaultVisit(SleuthkitVisitableItem svi) {
273 return new HashMap<>();
274 }
275
276 @Override
277 public Map<String, String>
visit(File f) {
279 }
280
281 @Override
282 public Map<String, String>
visit(DerivedFile df) {
284 }
285
286 @Override
287 public Map<String, String>
visit(Directory d) {
289 }
290
291 @Override
292 public Map<String, String>
visit(LocalDirectory ld) {
294 }
295
296 @Override
297 public Map<String, String>
visit(LayoutFile lf) {
298 // layout files do not have times
300 }
301
302 @Override
303 public Map<String, String>
visit(LocalFile lf) {
305 }
306
307 @Override
308 public Map<String, String>
visit(SlackFile f) {
310 }
311
327 return params;
328 }
329
339 Map<String, String> params = new HashMap<>();
340 params.put(
Server.
Schema.ID.toString(), Long.toString(file.getId()));
341 try {
342 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
343 } catch (TskCoreException ex) {
344 logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + file.getId(), ex); //NON-NLS
345 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
346 }
347 params.put(
Server.
Schema.FILE_NAME.toString(), file.getName().toLowerCase());
348 return params;
349 }
350
358 @Override
359 public Map<String, String>
visit(BlackboardArtifact artifact) {
360 Map<String, String> params = new HashMap<>();
361 params.put(
Server.
Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
362 try {
363 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(artifact.getDataSource().getId()));
364 } catch (TskCoreException ex) {
365 logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
366 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
367 }
368 return params;
369 }
370
378 @Override
379 public Map<String, String>
visit(Report report) {
380 Map<String, String> params = new HashMap<>();
381 params.put(
Server.
Schema.ID.toString(), Long.toString(report.getId()));
382 try {
383 Content dataSource = report.getDataSource();
384 if (null == dataSource) {
385 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
386 } else {
387 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
388 }
389 } catch (TskCoreException ex) {
390 logger.log(Level.SEVERE, "Could not get data source id to properly index the report, using default value. Id: " + report.getId(), ex); //NON-NLS
391 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
392 }
393 return params;
394 }
395 }
396
401 static class IngesterException extends Exception {
402
403 private static final long serialVersionUID = 1L;
404
405 IngesterException(String message, Throwable ex) {
406 super(message, ex);
407 }
408
409 IngesterException(String message) {
410 super(message);
411 }
412 }
413 }
Map< String, String > visit(Report report)
Map< String, String > visit(LayoutFile lf)
Map< String, String > visit(File f)
Map< String, String > visit(LocalDirectory ld)
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Map< String, String > visit(SlackFile f)
Map< String, String > visit(Directory d)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Map< String, String > visit(DerivedFile df)
Map< String, String > visit(BlackboardArtifact artifact)
Map< String, String > visit(LocalFile lf)
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)