MCPcopy
hub / github.com/lllyasviel/Fooocus / patched_SDClipModel_forward

Function patched_SDClipModel_forward

modules/patch_clip.py:109–146  ·  view source on GitHub ↗
(self, tokens)

Source from the content-addressed store, hash-verified

107
108
109def patched_SDClipModel_forward(self, tokens):
110 backup_embeds = self.transformer.get_input_embeddings()
111 device = backup_embeds.weight.device
112 tokens = self.set_up_textual_embeddings(tokens, backup_embeds)
113 tokens = torch.LongTensor(tokens).to(device)
114
115 attention_mask = None
116 if self.enable_attention_masks:
117 attention_mask = torch.zeros_like(tokens)
118 max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1
119 for x in range(attention_mask.shape[0]):
120 for y in range(attention_mask.shape[1]):
121 attention_mask[x, y] = 1
122 if tokens[x, y] == max_token:
123 break
124
125 outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask,
126 output_hidden_states=self.layer == "hidden")
127 self.transformer.set_input_embeddings(backup_embeds)
128
129 if self.layer == "last":
130 z = outputs.last_hidden_state
131 elif self.layer == "pooled":
132 z = outputs.pooler_output[:, None, :]
133 else:
134 z = outputs.hidden_states[self.layer_idx]
135 if self.layer_norm_hidden_state:
136 z = self.transformer.text_model.final_layer_norm(z)
137
138 if hasattr(outputs, "pooler_output"):
139 pooled_output = outputs.pooler_output.float()
140 else:
141 pooled_output = None
142
143 if self.text_projection is not None and pooled_output is not None:
144 pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float()
145
146 return z.float(), pooled_output
147
148
149def patched_ClipVisionModel__init__(self, json_config):

Callers

nothing calls this directly

Calls 4

toMethod · 0.80
get_input_embeddingsMethod · 0.45
set_input_embeddingsMethod · 0.45

Tested by

no test coverage detected