SCANEDIT is a 3D scene editing framework that uses language models to understand and execute scene manipulation tasks based on text prompts.
- Inference code released
- Installation scripts
- Documentation
- Inference sample
- Evaluation setup
- Evaluation data
# Clone the repository git clone https://github.com/aminebdj/ScanEdit cd ScanEdit # Run the installation script chmod +x install_environment.sh ./install_environment.sh # Activate the environment conda activate scanedit
export GROQ_API_KEY="your_api_key_here"
SCENE_NAME="3e8bba0176" python inference.py \ --scene_name "$SCENE_NAME" \ --text_prompt "Create a seating area for a lecture on the whiteboard" \ --path_to_ply "/data/scannetpp/plys/validation/${SCENE_NAME}.ply" \ --masks_classes_path "/data/scannetpp/semantic_3d_masks/${SCENE_NAME}.pt" \ --path_to_rgb_data "/data/scannetpp/data/${SCENE_NAME}/dslr/undistorted_images" \ --path_to_2d_masks "/data/scannetpp/undistorted_rast_masks/${SCENE_NAME}.pt" \ --path_to_annotations "/data/scannetpp/vlm_annotations/${SCENE_NAME}.yaml" \ --dataset "scannetpp" \ --llm_name "openai/gpt-oss-120b" \ --folder_name "results" \ --output_dir "./outputs" \ --verbose
Click to expand full argument list
--path_to_rgb_data: RGB images directory--path_to_2d_masks: 2D projection masks (.pt)--path_to_annotations: VLM annotations (.yaml)--path_to_save_additional_masks: Output path for masks--img_sample_path: Sample image for testing
--groq_api_key: GROQ API key (default: env var)--llm_name: LLM model name (default: "openai/gpt-oss-120b")--dataset: Dataset type (choices: scannetpp, replica)
--folder_name: Output subdirectory name--output_dir: Base output directory--device: Computation device (cuda/cpu)
--use_preds: Use predicted masks (adds extension)--verbose: Enable detailed logging
project_root/
βββ data/
β βββ scannetpp/
β βββ data/
β β βββ {scene_name}/
β β βββ dslr/
β β βββ undistorted_images/
β β βββ DSC00001.JPG
β β βββ ...
β βββ plys/
β β βββ validation/
β β βββ 3e8bba0176.ply
β β βββ ...
β βββ undistorted_rast_masks/
β β βββ {scene_name}.pt
β β
β βββ semantic_3d_masks/
β β βββ {scene_name}.pt
β β
β βββ vlm_annotations/
β βββ {scene_name}.yaml
β
βββ outputs/
βββ {folder_name}/
βββ {scene_name}/
βββ final.ply
βββ ...
@inproceedings{el2025scanedit, title={ScanEdit: Hierarchically-Guided Functional 3D Scan Editing}, author={El Amine Boudjoghra, Mohamed and Laptev, Ivan and Dai, Angela}, booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, pages={27105--27115}, year={2025} }