From b0dd3b7e00b1d5462e4043d3d68a391e33c17831 Mon Sep 17 00:00:00 2001
From: stevhliu <steven.liu@huggingface.co>
Date: 2025年8月12日 20:36:30 -0700
Subject: [PATCH 1/3] initial
---
 docs/source/en/using-diffusers/loading.md &#124; 24 +++++++++++++++++++++++
 1 file changed, 24 insertions(+)
diff --git a/docs/source/en/using-diffusers/loading.md b/docs/source/en/using-diffusers/loading.md
index 591a1382967e..586b1c692c0f 100644
--- a/docs/source/en/using-diffusers/loading.md
+++ b/docs/source/en/using-diffusers/loading.md
@@ -112,6 +112,30 @@ print(pipe.transformer.dtype, pipe.vae.dtype) # (torch.bfloat16, torch.float16)
 
 If a component is not explicitly specified in the dictionary and no `default` is provided, it will be loaded with `torch.float32`.
 
+#### Parallel loading
+
+Large models are often [sharded](../training/distributed_inference#model-sharding) into smaller files so that they are easier to load. Diffusers supports loading shards in parallel to speed up the loading process.
+
+Set the environment variables below to enable parallel loading.
+
+- Set `HF_ENABLE_PARALLEL_LOADING` to `"YES"` to enable parallel loading of shards.
+- Set `HF_PARALLEL_LOADING_WORKERS` to configure the number of parallel threads to use when loading shards. More workers loads a model faster but uses more memory.
+
+```py
+import os
+import torch
+from diffusers import DiffusionPipeline
+
+os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES"
+os.environ["HF_PARALLEL_LOADING_WORKERS"] = "12"
+
+pipeline = DiffusionPipeline.from_pretrained(
+ "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
+ torch_dtype=torch.bfloat16,
+ device_map="cuda"
+)
+```
+
 ### Local pipeline
 
 To load a pipeline locally, use [git-lfs](https://git-lfs.github.com/) to manually download a checkpoint to your local disk.
From e06b21f0be3fe5ae13ab32f8c03743a39d273153 Mon Sep 17 00:00:00 2001
From: stevhliu <steven.liu@huggingface.co>
Date: 2025年8月13日 10:52:53 -0700
Subject: [PATCH 2/3] feedback
---
 docs/source/en/using-diffusers/loading.md &#124; 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/docs/source/en/using-diffusers/loading.md b/docs/source/en/using-diffusers/loading.md
index 586b1c692c0f..aef1988f8658 100644
--- a/docs/source/en/using-diffusers/loading.md
+++ b/docs/source/en/using-diffusers/loading.md
@@ -112,7 +112,7 @@ print(pipe.transformer.dtype, pipe.vae.dtype) # (torch.bfloat16, torch.float16)
 
 If a component is not explicitly specified in the dictionary and no `default` is provided, it will be loaded with `torch.float32`.
 
-#### Parallel loading
+### Parallel loading
 
 Large models are often [sharded](../training/distributed_inference#model-sharding) into smaller files so that they are easier to load. Diffusers supports loading shards in parallel to speed up the loading process.
 
@@ -121,6 +121,8 @@ Set the environment variables below to enable parallel loading.
 - Set `HF_ENABLE_PARALLEL_LOADING` to `"YES"` to enable parallel loading of shards.
 - Set `HF_PARALLEL_LOADING_WORKERS` to configure the number of parallel threads to use when loading shards. More workers loads a model faster but uses more memory.
 
+The `device_map` argument should be set to `"cuda"` to pre-allocate a large chunk of memory based on the model size. This substantially reduces model load time because warming up the memory allocator now avoids many smaller calls to the allocator later.
+
 ```py
 import os
 import torch
From ca661ef13fc1bca9858b61daf664fe2b17b8b1b9 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: 2025年8月14日 09:39:28 +0530
Subject: [PATCH 3/3] Update docs/source/en/using-diffusers/loading.md
---
 docs/source/en/using-diffusers/loading.md &#124; 2 --
 1 file changed, 2 deletions(-)
diff --git a/docs/source/en/using-diffusers/loading.md b/docs/source/en/using-diffusers/loading.md
index aef1988f8658..20f0cc51e0af 100644
--- a/docs/source/en/using-diffusers/loading.md
+++ b/docs/source/en/using-diffusers/loading.md
@@ -129,8 +129,6 @@ import torch
 from diffusers import DiffusionPipeline
 
 os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES"
-os.environ["HF_PARALLEL_LOADING_WORKERS"] = "12"
-
 pipeline = DiffusionPipeline.from_pretrained(
 "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
 torch_dtype=torch.bfloat16,
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://patch-diff.githubusercontent.com/raw/huggingface/diffusers/pull/12135.patch">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://patch-diff.githubusercontent.com/raw/huggingface/diffusers/pull/12135.patch" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>