Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 43570b6

Browse files
Misc additions (#16)
1 parent 5ef38b9 commit 43570b6

File tree

9 files changed

+93
-28
lines changed

9 files changed

+93
-28
lines changed

‎Dockerfile‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM nvcr.io/nvidia/pytorch:22.11-py3
1+
FROM nvcr.io/nvidia/pytorch:23.01-py3
22

33
ARG USER=1000
44
ARG USERNAME=user

‎Makefile‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ gpt-bigcode-mqa1:
5555
gpt-bigcode-mqa2:
5656
${RUN_HF} ${BIGCODE_ARGS} attention_type=3
5757

58+
.PHONY: santacoder-original
59+
santacoder:
60+
${RUN_HF} --pretrained_model=bigcode/santacoder --tokenizer=bigcode/santacoder --trust_remote_code ${EXP_ARGS}
61+
5862
.PHONY: santacoder
5963
santacoder:
6064
${RUN_HF} --pretrained_model=bigcode/santacoder-fast-inference --tokenizer=bigcode/santacoder ${EXP_ARGS}
65+
66+
.PHONY: optimized-santacoder
67+
optimized-santacoder:
68+
${RUN_HF} --pretrained_model=olivierdehaene/optimized-santacoder --tokenizer=bigcode/santacoder --trust_remote_code ${EXP_ARGS}

‎requirements.txt‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
accelerate==0.15.0
22
bitsandbytes
3+
safetensors
34
deepspeed==0.7.7
45
-e ./transformers
56

‎src/main.py‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ def main(argv: Optional[List[str]] = None) -> None:
1414
pipeline = pipeline_class(
1515
model_type=args.model_type,
1616
pretrained_model=args.pretrained_model,
17+
pretrained_config=args.pretrained_config,
1718
config_args=args.config_args,
1819
tokenizer=args.tokenizer,
1920
device=args.device,
2021
dtype=args.dtype,
2122
fast_init=args.fast_init,
23+
trust_remote_code=args.trust_remote_code,
2224
)
2325

2426
benchmark_end_to_end(

‎src/pipelines/pipeline.py‎

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from src.utils.fast_init import fast_init
1111
from src.utils.logging import format_ms, log_rank_n
12+
from src.utils.utils import parse_revision
1213
from transformers import (
1314
CONFIG_MAPPING,
1415
AutoConfig,
@@ -41,12 +42,14 @@ def __init__(
4142
self,
4243
*,
4344
model_type: Optional[str] = None,
45+
pretrained_config: Optional[str] = None,
4446
pretrained_model: Optional[str] = None,
4547
config_args: Dict[str, Any],
4648
tokenizer: str,
4749
device: torch.device,
4850
dtype: torch.dtype,
4951
fast_init: bool = True,
52+
trust_remote_code: bool = False,
5053
):
5154
self.initialization_metrics = {}
5255
log_rank_n("*** Setting up tokenizer", logger.info)
@@ -60,10 +63,11 @@ def __init__(
6063
self.dtype = dtype
6164
self.is_int8 = self.dtype == torch.int8
6265
self.fast_init = fast_init
66+
self.trust_remote_code = trust_remote_code
6367
if self.is_int8 and self.device != torch.device("cuda"):
6468
raise ValueError(f"Model quantization not supported on device {self.device}")
6569

66-
self.config = self._get_config(model_type, pretrained_model, config_args)
70+
self.config = self._get_config(model_type, pretrained_configorpretrained_model, config_args)
6771
t2 = time.perf_counter()
6872

6973
logger.info(f"Model configuration: {self.config}")
@@ -86,7 +90,9 @@ def _create_model(self) -> PreTrainedModel:
8690
log_rank_n("*** Creating model", logger.info)
8791
with fast_init(self.device) if self.fast_init else contextlib.nullcontext():
8892
torch_dtype = torch.float16 if self.is_int8 else self.dtype
89-
model = AutoModelForCausalLM.from_config(config=self.config, torch_dtype=torch_dtype)
93+
model = AutoModelForCausalLM.from_config(
94+
config=self.config, torch_dtype=torch_dtype, trust_remote_code=self.trust_remote_code
95+
)
9096
t1 = time.perf_counter()
9197
log_rank_n("*** Moving to device", logger.info)
9298
model.to(self.device)
@@ -98,6 +104,7 @@ def _create_model(self) -> PreTrainedModel:
98104
self.initialization_metrics["model initialization"] = t1 - t0
99105
self.initialization_metrics["move to device"] = t2 - t1
100106
self.initialization_metrics["initialize weights"] = t3 - t2
107+
101108
return model
102109

103110
def _reload_model(self):
@@ -118,9 +125,12 @@ def _load_pretrained(self, pretrained_model: str) -> PreTrainedModel:
118125
log_rank_n(f"*** Loading model from {pretrained_model}", logger.info)
119126
kwargs = {"load_in_8bit": True, "device_map": "auto"} if self.is_int8 else {"torch_dtype": self.dtype}
120127
with fast_init(self.device) if self.fast_init else contextlib.nullcontext():
128+
pretrained_model, revision = parse_revision(pretrained_model)
121129
model = AutoModelForCausalLM.from_pretrained(
122130
pretrained_model,
131+
revision=revision,
123132
config=self.config,
133+
trust_remote_code=self.trust_remote_code,
124134
**kwargs,
125135
)
126136
t1 = time.perf_counter()
@@ -135,7 +145,7 @@ def _load_pretrained(self, pretrained_model: str) -> PreTrainedModel:
135145
def _get_config(
136146
self,
137147
model_type: Optional[str],
138-
pretrained_model: Optional[str],
148+
pretrained_config: Optional[str],
139149
config_args: Dict[str, Any],
140150
) -> PretrainedConfig:
141151
config_args = {
@@ -145,15 +155,16 @@ def _get_config(
145155
}
146156

147157
if model_type is None:
148-
if pretrained_model is None:
158+
if pretrained_config is None:
149159
raise ValueError("You need to provide either --model_type or --pretrained_model")
150160
config_class = AutoConfig
151161
elif model_type not in CONFIG_MAPPING:
152162
raise ValueError(f"Unknown model type: {model_type}")
153163
else:
154164
config_class = CONFIG_MAPPING[model_type]
165+
config_args["model_type"] = model_type
155166

156-
if pretrained_model is None:
167+
if pretrained_config is None:
157168
config_args.update(
158169
{
159170
"bos_token_id": self.tokenizer.bos_token_id,
@@ -163,7 +174,10 @@ def _get_config(
163174
)
164175
config, unused = config_class.from_dict({}, **config_args)
165176
else:
166-
config, unused = config_class.from_pretrained(pretrained_model, **config_args)
177+
pretrained_config, revision = parse_revision(pretrained_config)
178+
config, unused = config_class.from_pretrained(
179+
pretrained_config, revision=revision, trust_remote_code=self.trust_remote_code, **config_args
180+
)
167181

168182
if unused:
169183
raise ValueError(f"There were unused configuration parameters: {tuple(unused)}")
@@ -216,7 +230,8 @@ def aggregate_and_format_metrics(self, metrics: List[Dict[str, Any]]):
216230
"Latency (decode)": format_ms(mean_metrics[DECODE_TIME]),
217231
"Latency (max)": format_ms(max(all_metrics[END_TO_END_TIME])),
218232
"Latency (min)": format_ms(min(all_metrics[END_TO_END_TIME])),
219-
"Tokens generated": f"{mean_metrics[NUM_GENERATED_TOKENS]:.0f}",
233+
"Tokens generated (average)": f"{mean_metrics[NUM_GENERATED_TOKENS]:.0f}",
234+
"Tokens generated (total)": f"{np.sum(all_metrics[NUM_GENERATED_TOKENS]).item():.0f}",
220235
"Throughput (model)": f"{model_throughput:.2f} tokens/s",
221236
"Throughput (end to end)": f"{throughput:.2f} tokens/s",
222237
"Token time (end to end)": f"{format_ms(throughput ** -1)}/token",

‎src/utils/arguments.py‎

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ def get_arg_parser() -> ArgumentParser:
1111

1212
# Model
1313
parser.add_argument("--model_type")
14+
parser.add_argument("--pretrained_config")
1415
parser.add_argument("--pretrained_model")
1516
parser.add_argument("--tokenizer", default="gpt2")
17+
parser.add_argument("--trust_remote_code", action="store_true")
1618
parser.add_argument("config_args", nargs="*")
1719

1820
# Runtime
@@ -47,10 +49,14 @@ def get_arg_parser() -> ArgumentParser:
4749
def parse_config_args(config_args: List[str]) -> typing.Dict[str, Any]:
4850
parsed_config_args = {}
4951
for config_arg in config_args:
50-
try:
51-
key, value = [x.strip() for x in config_arg.split("=")]
52-
except ValueError:
53-
raise ValueError(f"Cannot parse argument: {config_arg}")
52+
split_arg = [x.strip() for x in config_arg.split("=", 1)]
53+
if len(split_arg) != 2:
54+
raise ValueError(f"Cannot parse argument (not in 'key=value' format): {config_arg}")
55+
key, value = split_arg
56+
if not key.isidentifier():
57+
raise ValueError(f"Invalid argument (not a python identifier): {key}")
58+
if key in parsed_config_args:
59+
raise ValueError(f"Duplicate argument: {key}")
5460
if value.lower() == "true":
5561
value = True
5662
elif value.lower() == "false":
@@ -65,7 +71,7 @@ def parse_config_args(config_args: List[str]) -> typing.Dict[str, Any]:
6571
value = float(value)
6672
except ValueError:
6773
pass
68-
parsed_config_args[key.strip()] = value
74+
parsed_config_args[key] = value
6975
return parsed_config_args
7076

7177

‎src/utils/benchmark.py‎

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import contextlib
22
import gc
33
import logging
4+
import time
45
from typing import List, Union
56

67
import torch
78

89
from src.pipelines.pipeline import Pipeline
9-
from src.utils.logging import format_ms, log_dict, log_rank_n
10+
from src.utils.logging import format_mib, format_ms, log_dict, log_rank_n
1011

1112

1213
logger = logging.getLogger(__name__)
@@ -91,8 +92,27 @@ def benchmark_end_to_end(
9192
else:
9293
profiler = contextlib.nullcontext()
9394

95+
benchmark_stats = {
96+
"Model parameters": pipeline.get_num_parameters(),
97+
"Batch size": len(inputs),
98+
**generate_kwargs,
99+
**pipeline.get_initialization_metrics(),
100+
"Warmup cycles": skip + warmup,
101+
"Benchmark cycles": cycles,
102+
"Total cycles": skip + warmup + cycles,
103+
}
104+
105+
if pipeline.device.type == "cuda":
106+
benchmark_stats["Initial memory used"] = format_mib(torch.cuda.memory_allocated())
107+
benchmark_stats["Initial memory reserved"] = format_mib(torch.cuda.memory_reserved())
108+
torch.cuda.reset_peak_memory_stats()
109+
110+
t0 = time.perf_counter()
94111
with profiler as p:
95112
for step in range(skip + warmup + cycles):
113+
if step == skip + warmup:
114+
t1 = time.perf_counter()
115+
benchmark_stats["Warmup time"] = format_ms(t1 - t0)
96116
generated_text, metrics = pipeline(inputs, **generate_kwargs)
97117
if profile:
98118
p.step()
@@ -108,18 +128,18 @@ def benchmark_end_to_end(
108128
torch.cuda.synchronize()
109129
gc.collect()
110130
torch.cuda.empty_cache()
131+
if pipeline.device.type == "cuda":
132+
benchmark_stats["Memory used"] = format_mib(torch.cuda.memory_allocated())
133+
benchmark_stats["Memory reserved"] = format_mib(torch.cuda.memory_reserved())
134+
benchmark_stats["Max memory used"] = format_mib(torch.cuda.max_memory_allocated())
135+
benchmark_stats["Max memory reserved"] = format_mib(torch.cuda.max_memory_reserved())
136+
137+
t2 = time.perf_counter()
138+
benchmark_stats["Benchmark time"] = format_ms(t2 - t1)
139+
benchmark_stats["Total time"] = format_ms(t2 - t0)
111140

112141
if len(all_metrics) > 0:
113-
log_rank_n("*** Performance metrics:", logger.info)
114-
log_dict(pipeline.aggregate_and_format_metrics(all_metrics), logger.info)
115-
116-
log_rank_n("*** Benchmarking stats:", logger.info)
117-
log_dict(
118-
{
119-
"Model parameters": pipeline.get_num_parameters(),
120-
"Batch size": len(inputs),
121-
**generate_kwargs,
122-
**pipeline.get_initialization_metrics(),
123-
},
124-
logger.info,
125-
)
142+
benchmark_stats.update(pipeline.aggregate_and_format_metrics(all_metrics))
143+
144+
log_rank_n("*** Benchmark results:", logger.info)
145+
log_dict(benchmark_stats, logger.info)

‎src/utils/logging.py‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,7 @@ def log_dict(data: dict, logger: Callable = logging.info, rank: int = 0):
4343

4444
def format_ms(t: float):
4545
return f"{1000 * t:.2f} ms"
46+
47+
48+
def format_mib(m: float):
49+
return f"{m/2**20:.0f} MiB"

‎src/utils/utils.py‎

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import time
22
from functools import partial
3-
from typing import Any, List, Tuple, Union
3+
from typing import Any, List, Optional, Tuple, Union
44

55

66
def run_and_log_time(execs: Union[List[partial], partial]) -> Tuple[Union[List[Any], Any], float]:
@@ -16,3 +16,12 @@ def run_and_log_time(execs: Union[List[partial], partial]) -> Tuple[Union[List[A
1616

1717
time_elapsed = time.perf_counter() - start_time
1818
return results, time_elapsed
19+
20+
21+
def parse_revision(pretrained_model: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
22+
revision = None
23+
if pretrained_model is not None:
24+
pretrained_split = pretrained_model.split(":", 1)
25+
if len(pretrained_split) == 2:
26+
pretrained_model, revision = pretrained_split
27+
return pretrained_model, revision

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /