terraform -chdir="terraform" apply # SSH into VM ssh -i "llm.pem" ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com git clone https://github.com/AdarshNandanwar/Language-Model-Math-Solver-via-Code-Generation.git hw6 cd /home/ubuntu/hw6
conda env create -f environment.yml conda activate llmhw6
# Setup environment and start jupyter server export LC_ALL=en_US.utf-8 conda create --name llmhw6 python=3.10 # If you run into error like `UnavailableInvalidChannel: HTTP 403 FORBIDDEN for channel <some channel>` on your EC2 instance, you can solve it by running `conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com`, and make sure you have the default channel by running `conda config --add channels defaults`. conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com conda config --add channels defaults conda init bash exit # Restart shell conda activate llmhw6 conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia pip install --upgrade pip pip3 install -r ~/hw6/requirements.txt --use-pep517 pip install -qqq flash-attn
jupyter notebook --no-browser # http://localhost:2222/tree?token=894eb5eeee4d8d4142d79773fbcedbfaef1e8eabb3a9cdcf # In another window ssh -i llm.pem -L 2222:localhost:8888 ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com
python generate_clean_dataset.py
# python generate_prompts.py --model_id google/gemma-2-2b-it # rm -r generations/google/gemma-2-2b-it # python inference.py --model_id google/gemma-2-2b-it --max_samples 932 --use_vllm 1 # python evaluate.py --model_id google/gemma-2-2b-it # python metrics.py --model_id google/gemma-2-2b-it python generate_prompts.py --model_id meta-llama/Llama-3.2-3B-Instruct rm -r generations/meta-llama/Llama-3.2-3B-Instruct python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_samples 932 --use_vllm 1 python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct python generate_prompts.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct rm -r generations/Qwen/Qwen2.5-Coder-7B-Instruct python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_samples 932 --use_vllm 1 python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
Note - For finetuning, we used the prompt1 verbalizer.
export HUGGINGFACE_TOKEN="" export WANDB_API_KEY="" # Llama Hyperparameter tuning python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0 python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1 python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1 python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0 python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2 python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2 python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0 python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3 python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3 # Llama finetuning python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 2 --config_file configs/config_llama.json python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama --max_samples 932 --use_vllm 0 python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama # Qwen Hyperparameter tuning python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0 python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1 python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1 python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0 python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2 python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2 python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0 python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3 python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3 # Qwen finetuning python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 2 --config_file configs/config_qwen.json python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen --max_samples 932 --use_vllm 0 python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen
Setup Hugging Face accelerate configuration and start training using the following commands.
accelerate config
# The config file is copied in the root of the repo "accelerate_config.yaml"
accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 8 --config_file configs/config_llama.json
accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 4 --config_file configs/config_qwen.jsonscp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/prompts . scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/generations . scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/evaluation . scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/metrics .
terraform -chdir="terraform" destroy