Reshape audio latent (b, c, t, f) → ref_audio token dict (b, t, c*f).
(latent)
| 524 | |
| 525 | |
| 526 | def _patchify_audio_latent(latent): |
| 527 | """Reshape audio latent (b, c, t, f) → ref_audio token dict (b, t, c*f).""" |
| 528 | b, c, t, f = latent.shape |
| 529 | ref_tokens = latent.permute(0, 2, 1, 3).reshape(b, t, c * f) |
| 530 | return {"tokens": ref_tokens} |
| 531 | |
| 532 | |
| 533 | @comfy_node(name="LTXVSetAudioRefTokens") |