1 /*
 2  * Autopsy Forensic Browser
 3  *
 4  * Copyright 2011-2018 Basis Technology Corp.
 5  * Contact: carrier <at> sleuthkit <dot> org
 6  *
 7  * Licensed under the Apache License, Version 2.0 (the "License");
 8  * you may not use this file except in compliance with the License.
 9  * You may obtain a copy of the License at
 10  *
 11  * http://www.apache.org/licenses/LICENSE-2.0
 12  *
 13  * Unless required by applicable law or agreed to in writing, software
 14  * distributed under the License is distributed on an "AS IS" BASIS,
 15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16  * See the License for the specific language governing permissions and
 17  * limitations under the License.
 18  */
 19 package org.sleuthkit.autopsy.keywordsearch;
 20 
 21 import java.io.BufferedReader;
 22 import java.util.HashMap;
 23 import java.util.Map;
 24 import java.util.logging.Level;
 25 import org.apache.solr.client.solrj.SolrServerException;
 
 26 import org.apache.solr.common.SolrInputDocument;
 
 27 import org.openide.util.NbBundle;
 
 48 
 52 //JMTODO: Should this class really be a singleton?
 53 class Ingester {
 54 
 55  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
 56  private volatile boolean uncommitedIngests = false;
 57  private final Server solrServer = KeywordSearch.getServer();
 58  private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
 59  private static Ingester instance;
 60  private static final int SINGLE_READ_CHARS = 512;
 61 
 62  private Ingester() {
 63  }
 64 
 65  public static synchronized Ingester getDefault() {
 66  if (instance == null) {
 67  instance = new Ingester();
 68  }
 69  return instance;
 70  }
 71 
 72  //JMTODO: this is probably useless
 73  @Override
 74  @SuppressWarnings("FinalizeDeclaration")
 75  protected void finalize() throws Throwable {
 76  super.finalize();
 77 
 78  // Warn if files might have been left uncommited.
 79  if (uncommitedIngests) {
 80  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
 81  }
 82  }
 83 
 94  void indexMetaDataOnly(AbstractFile file) throws IngesterException {
 95  indexChunk("", file.getName(), getContentFields(file));
 96  }
 97 
 108  void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
 109  indexChunk("", new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
 110  }
 111 
 120  private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
 121  return item.accept(SOLR_FIELDS_VISITOR);
 122  }
 123 
 143  // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients 
 144  < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
 145  final long sourceID = extractor.getID(source);
 146  final String sourceName = extractor.getName(source);
 147 
 148  int numChunks = 0; //unknown until chunking is done
 149 
 150  if (extractor.isDisabled()) {
 151  /*
 152  * some Extractors, notable the strings extractor, have options
 153  * which can be configured such that no extraction should be done
 154  */
 155  return true;
 156  }
 157 
 158  Map<String, String> fields = getContentFields(source);
 159  //Get a reader for the content of the given source
 160  try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
 161  Chunker chunker = new Chunker(reader);
 162  for (Chunk chunk : chunker) {
 163  if (context != null && context.fileIngestIsCancelled()) {
 164  logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
 165  return false;
 166  }
 167  String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
 168  fields.put(Server.Schema.ID.toString(), chunkId);
 169  fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
 170  try {
 171  //add the chunk text to Solr index
 172  indexChunk(chunk.toString(), sourceName, fields);
 173  numChunks++;
 174  } catch (Ingester.IngesterException ingEx) {
 175  extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
 176  + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
 177 
 178  throw ingEx; //need to rethrow to signal error and move on
 179  }
 180  }
 181  if (chunker.hasException()) {
 182  extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
 183  return false;
 184  }
 185  } catch (Exception ex) {
 186  extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
 187  return false;
 188  } finally {
 189  if (context != null && context.fileIngestIsCancelled()) {
 190  return false;
 191  } else {
 192  //after all chunks, index just the meta data, including the numChunks, of the parent file
 193  fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
 194  //reset id field to base document id
 195  fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
 196  //"parent" docs don't have chunk_size
 197  fields.remove(Server.Schema.CHUNK_SIZE.toString());
 198  indexChunk(null, sourceName, fields);
 199  }
 200  }
 201  return true;
 202  }
 203 
 217  private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
 218  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
 219  //JMTODO: actually if the we couldn't get the image id it is set to -1,
 220  // but does this really mean we don't want to index it?
 221 
 222  //skip the file, image id unknown
 223  String msg = NbBundle.getMessage(Ingester.class,
 224  "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
 225  logger.log(Level.SEVERE, msg);
 226  throw new IngesterException(msg);
 227  }
 228 
 229  //Make a SolrInputDocument out of the field map
 230  SolrInputDocument updateDoc = new SolrInputDocument();
 231  for (String key : fields.keySet()) {
 232  updateDoc.addField(key, fields.get(key));
 233  }
 234  //add the content to the SolrInputDocument
 235  //JMTODO: can we just add it to the field map before passing that in?
 236  updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
 237 
 238  try {
 239  //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
 240  TimingMetric metric = EnterpriseHealthMonitor.getTimingMetric("Solr: Index chunk");
 241  solrServer.addDocument(updateDoc);
 242  EnterpriseHealthMonitor.submitTimingMetric(metric);
 243  uncommitedIngests = true;
 244 
 245  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
 246  //JMTODO: does this need to be internationalized?
 247  throw new IngesterException(
 248  NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
 249  }
 250  }
 251 
 256  void commit() {
 257  try {
 258  solrServer.commit();
 259  uncommitedIngests = false;
 260  } catch (NoOpenCoreException | SolrServerException ex) {
 261  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
 262 
 263  }
 264  }
 265 
 269   static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
 
 270 
 271  @Override
 272   protected Map<String, String> 
defaultVisit(SleuthkitVisitableItem svi) {
 
 273  return new HashMap<>();
 274  }
 275 
 276  @Override
 277   public Map<String, String> 
visit(File f) {
 
 279  }
 280 
 281  @Override
 282   public Map<String, String> 
visit(DerivedFile df) {
 
 284  }
 285 
 286  @Override
 287   public Map<String, String> 
visit(Directory d) {
 
 289  }
 290 
 291  @Override
 292   public Map<String, String> 
visit(LocalDirectory ld) {
 
 294  }
 295 
 296  @Override
 297   public Map<String, String> 
visit(LayoutFile lf) {
 
 298  // layout files do not have times
 300  }
 301 
 302  @Override
 303   public Map<String, String> 
visit(LocalFile lf) {
 
 305  }
 306 
 307  @Override
 308   public Map<String, String> 
visit(SlackFile f) {
 
 310  }
 311 
 327  return params;
 328  }
 329 
 339  Map<String, String> params = new HashMap<>();
 340  params.put(
Server.
Schema.ID.toString(), Long.toString(file.getId()));
 
 341  try {
 342  params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
 
 343  } catch (TskCoreException ex) {
 344  logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + file.getId(), ex); //NON-NLS
 345  params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
 346  }
 347  params.put(
Server.
Schema.FILE_NAME.toString(), file.getName());
 
 348  return params;
 349  }
 350 
 358  @Override
 359   public Map<String, String> 
visit(BlackboardArtifact artifact) {
 
 360  Map<String, String> params = new HashMap<>();
 361  params.put(
Server.
Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
 
 362  try {
 363  params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
 
 364  } catch (TskCoreException ex) {
 365  logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
 366  params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
 367  }
 368  return params;
 369  }
 370 
 378  @Override
 379   public Map<String, String> 
visit(Report report) {
 
 380  Map<String, String> params = new HashMap<>();
 381  params.put(
Server.
Schema.ID.toString(), Long.toString(report.getId()));
 
 382  try {
 383  Content dataSource = report.getDataSource();
 384  if (null == dataSource) {
 385  params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
 386  } else {
 387  params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
 
 388  }
 389  } catch (TskCoreException ex) {
 390  logger.log(Level.SEVERE, "Could not get data source id to properly index the report, using default value. Id: " + report.getId(), ex); //NON-NLS
 391  params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
 392  }
 393  return params;
 394  }
 395  }
 396 
 401  static class IngesterException extends Exception {
 402 
 403  private static final long serialVersionUID = 1L;
 404 
 405  IngesterException(String message, Throwable ex) {
 406  super(message, ex);
 407  }
 408 
 409  IngesterException(String message) {
 410  super(message);
 411  }
 412  }
 413 }
Map< String, String > visit(Report report)
Map< String, String > visit(LayoutFile lf)
Map< String, String > visit(File f)
Map< String, String > visit(LocalDirectory ld)
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Map< String, String > visit(SlackFile f)
Map< String, String > visit(Directory d)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Map< String, String > visit(DerivedFile df)
Map< String, String > visit(BlackboardArtifact artifact)
Map< String, String > visit(LocalFile lf)
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)