MCPcopy
hub / github.com/lllyasviel/FramePack / worker

Function worker

demo_gradio.py:103–315  ·  view source on GitHub ↗
(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf)

Source from the content-addressed store, hash-verified

101
102@torch.no_grad()
103def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf):
104 total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
105 total_latent_sections = int(max(round(total_latent_sections), 1))
106
107 job_id = generate_timestamp()
108
109 stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
110
111 try:
112 # Clean GPU
113 if not high_vram:
114 unload_complete_models(
115 text_encoder, text_encoder_2, image_encoder, vae, transformer
116 )
117
118 # Text encoding
119
120 stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding ...'))))
121
122 if not high_vram:
123 fake_diffusers_current_device(text_encoder, gpu) # since we only encode one text - that is one model move and one encode, offload is same time consumption since it is also one load and one encode.
124 load_model_as_complete(text_encoder_2, target_device=gpu)
125
126 llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
127
128 if cfg == 1:
129 llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
130 else:
131 llama_vec_n, clip_l_pooler_n = encode_prompt_conds(n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
132
133 llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
134 llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)
135
136 # Processing input image
137
138 stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing ...'))))
139
140 H, W, C = input_image.shape
141 height, width = find_nearest_bucket(H, W, resolution=640)
142 input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)
143
144 Image.fromarray(input_image_np).save(os.path.join(outputs_folder, f'{job_id}.png'))
145
146 input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
147 input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]
148
149 # VAE encoding
150
151 stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))
152
153 if not high_vram:
154 load_model_as_complete(vae, target_device=gpu)
155
156 start_latent = vae_encode(input_image_pt, vae)
157
158 # CLIP Vision
159
160 stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))

Callers

nothing calls this directly

Calls 15

generate_timestampFunction · 0.90
make_progress_bar_htmlFunction · 0.90
unload_complete_modelsFunction · 0.90
load_model_as_completeFunction · 0.90
encode_prompt_condsFunction · 0.90
crop_or_pad_yield_maskFunction · 0.90
find_nearest_bucketFunction · 0.90
resize_and_center_cropFunction · 0.90
vae_encodeFunction · 0.90
hf_clip_vision_encodeFunction · 0.90

Tested by

no test coverage detected