From b0dd3b7e00b1d5462e4043d3d68a391e33c17831 Mon Sep 17 00:00:00 2001 From: stevhliu Date: 2025年8月12日 20:36:30 -0700 Subject: [PATCH 1/3] initial --- docs/source/en/using-diffusers/loading.md | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/source/en/using-diffusers/loading.md b/docs/source/en/using-diffusers/loading.md index 591a1382967e..586b1c692c0f 100644 --- a/docs/source/en/using-diffusers/loading.md +++ b/docs/source/en/using-diffusers/loading.md @@ -112,6 +112,30 @@ print(pipe.transformer.dtype, pipe.vae.dtype) # (torch.bfloat16, torch.float16) If a component is not explicitly specified in the dictionary and no `default` is provided, it will be loaded with `torch.float32`. +#### Parallel loading + +Large models are often [sharded](../training/distributed_inference#model-sharding) into smaller files so that they are easier to load. Diffusers supports loading shards in parallel to speed up the loading process. + +Set the environment variables below to enable parallel loading. + +- Set `HF_ENABLE_PARALLEL_LOADING` to `"YES"` to enable parallel loading of shards. +- Set `HF_PARALLEL_LOADING_WORKERS` to configure the number of parallel threads to use when loading shards. More workers loads a model faster but uses more memory. + +```py +import os +import torch +from diffusers import DiffusionPipeline + +os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES" +os.environ["HF_PARALLEL_LOADING_WORKERS"] = "12" + +pipeline = DiffusionPipeline.from_pretrained( + "Wan-AI/Wan2.2-I2V-A14B-Diffusers", + torch_dtype=torch.bfloat16, + device_map="cuda" +) +``` + ### Local pipeline To load a pipeline locally, use [git-lfs](https://git-lfs.github.com/) to manually download a checkpoint to your local disk. From e06b21f0be3fe5ae13ab32f8c03743a39d273153 Mon Sep 17 00:00:00 2001 From: stevhliu Date: 2025年8月13日 10:52:53 -0700 Subject: [PATCH 2/3] feedback --- docs/source/en/using-diffusers/loading.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/en/using-diffusers/loading.md b/docs/source/en/using-diffusers/loading.md index 586b1c692c0f..aef1988f8658 100644 --- a/docs/source/en/using-diffusers/loading.md +++ b/docs/source/en/using-diffusers/loading.md @@ -112,7 +112,7 @@ print(pipe.transformer.dtype, pipe.vae.dtype) # (torch.bfloat16, torch.float16) If a component is not explicitly specified in the dictionary and no `default` is provided, it will be loaded with `torch.float32`. -#### Parallel loading +### Parallel loading Large models are often [sharded](../training/distributed_inference#model-sharding) into smaller files so that they are easier to load. Diffusers supports loading shards in parallel to speed up the loading process. @@ -121,6 +121,8 @@ Set the environment variables below to enable parallel loading. - Set `HF_ENABLE_PARALLEL_LOADING` to `"YES"` to enable parallel loading of shards. - Set `HF_PARALLEL_LOADING_WORKERS` to configure the number of parallel threads to use when loading shards. More workers loads a model faster but uses more memory. +The `device_map` argument should be set to `"cuda"` to pre-allocate a large chunk of memory based on the model size. This substantially reduces model load time because warming up the memory allocator now avoids many smaller calls to the allocator later. + ```py import os import torch From ca661ef13fc1bca9858b61daf664fe2b17b8b1b9 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: 2025年8月14日 09:39:28 +0530 Subject: [PATCH 3/3] Update docs/source/en/using-diffusers/loading.md --- docs/source/en/using-diffusers/loading.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/en/using-diffusers/loading.md b/docs/source/en/using-diffusers/loading.md index aef1988f8658..20f0cc51e0af 100644 --- a/docs/source/en/using-diffusers/loading.md +++ b/docs/source/en/using-diffusers/loading.md @@ -129,8 +129,6 @@ import torch from diffusers import DiffusionPipeline os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES" -os.environ["HF_PARALLEL_LOADING_WORKERS"] = "12" - pipeline = DiffusionPipeline.from_pretrained( "Wan-AI/Wan2.2-I2V-A14B-Diffusers", torch_dtype=torch.bfloat16,

AltStyle によって変換されたページ (->オリジナル) /