GitHub - nikJ13/Language-Model-Math-Solver-via-Code-Generation

Name	Name	Last commit message	Last commit date
Latest commit History 98 Commits
Presentation.key	Presentation.key
configs	configs
data	data
evaluation	evaluation
generations	generations
latex	latex
metrics	metrics
prompts	prompts
terraform	terraform
.gitignore	.gitignore
README.md	README.md
accelerate_config.yaml	accelerate_config.yaml
environment.yml	environment.yml
evaluate.py	evaluate.py
finetune.py	finetune.py
generate_clean_dataset.py	generate_clean_dataset.py
generate_prompts.py	generate_prompts.py
handout.pdf	handout.pdf
inference.py	inference.py
metrics.py	metrics.py
model.txt	model.txt
presentation.pdf	presentation.pdf
presentation_recording.mov	presentation_recording.mov
report.pdf	report.pdf
requirements.txt	requirements.txt
utils.py	utils.py

Setup

terraform -chdir="terraform" apply
# SSH into VM
ssh -i "llm.pem" ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com
git clone https://github.com/AdarshNandanwar/Language-Model-Math-Solver-via-Code-Generation.git hw6
cd /home/ubuntu/hw6

Option 1 (Conda environment.yml)

conda env create -f environment.yml
conda activate llmhw6

Option2 (Manual setup using pip)

# Setup environment and start jupyter server
export LC_ALL=en_US.utf-8
conda create --name llmhw6 python=3.10
# If you run into error like `UnavailableInvalidChannel: HTTP 403 FORBIDDEN for channel <some channel>` on your EC2 instance, you can solve it by running `conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com`, and make sure you have the default channel by running `conda config --add channels defaults`.
conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com
conda config --add channels defaults
conda init bash
exit
# Restart shell
conda activate llmhw6
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
pip install --upgrade pip
pip3 install -r ~/hw6/requirements.txt --use-pep517
pip install -qqq flash-attn

Setup Jupyter Notebook

jupyter notebook --no-browser
# http://localhost:2222/tree?token=894eb5eeee4d8d4142d79773fbcedbfaef1e8eabb3a9cdcf
# In another window
ssh -i llm.pem -L 2222:localhost:8888 ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com

Execution

Dataset

python generate_clean_dataset.py

In-context

# python generate_prompts.py --model_id google/gemma-2-2b-it
# rm -r generations/google/gemma-2-2b-it
# python inference.py --model_id google/gemma-2-2b-it --max_samples 932 --use_vllm 1
# python evaluate.py --model_id google/gemma-2-2b-it
# python metrics.py --model_id google/gemma-2-2b-it
python generate_prompts.py --model_id meta-llama/Llama-3.2-3B-Instruct
rm -r generations/meta-llama/Llama-3.2-3B-Instruct
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_samples 932 --use_vllm 1
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct
python generate_prompts.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
rm -r generations/Qwen/Qwen2.5-Coder-7B-Instruct
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_samples 932 --use_vllm 1
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct

PEFT

Note - For finetuning, we used the prompt1 verbalizer.

export HUGGINGFACE_TOKEN=""
export WANDB_API_KEY=""
# Llama Hyperparameter tuning
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3
# Llama finetuning
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 2 --config_file configs/config_llama.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama --max_samples 932 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama
# Qwen Hyperparameter tuning
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3
# Qwen finetuning
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 2 --config_file configs/config_qwen.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen --max_samples 932 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen

Distributed finetuning

Setup Hugging Face accelerate configuration and start training using the following commands.

accelerate config
# The config file is copied in the root of the repo "accelerate_config.yaml"
accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 8 --config_file configs/config_llama.json
accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 4 --config_file configs/config_qwen.json

Download outputs

scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/prompts .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/generations .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/evaluation .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/metrics .

Teardown

terraform -chdir="terraform" destroy

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

nikJ13/Language-Model-Math-Solver-via-Code-Generation

Folders and files

Latest commit

History

Repository files navigation

Setup

Option 1 (Conda environment.yml)

Option2 (Manual setup using pip)

Setup Jupyter Notebook

Execution

Dataset

In-context

PEFT

Distributed finetuning

Download outputs

Teardown

About

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages

Languages

nikJ13/Language-Model-Math-Solver-via-Code-Generation

Folders and files

Latest commit

History

Repository files navigation

Setup

Option 1 (Conda environment.yml)

Option2 (Manual setup using pip)

Setup Jupyter Notebook

Execution

Dataset

In-context

PEFT

Distributed finetuning

Download outputs

Teardown

About

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages