()
| 917 | |
| 918 | |
| 919 | def text_encoder(): |
| 920 | print("loading CLIP text encoder") |
| 921 | |
| 922 | clip_name = "openai/clip-vit-large-patch14" |
| 923 | |
| 924 | # sets pad_value to 0 |
| 925 | pad_token = "!" |
| 926 | |
| 927 | tokenizer_model = CLIPTokenizer.from_pretrained(clip_name, pad_token=pad_token, device_map="auto") |
| 928 | |
| 929 | assert tokenizer_model.convert_tokens_to_ids(pad_token) == 0 |
| 930 | |
| 931 | text_encoder_model = CLIPTextModelWithProjection.from_pretrained( |
| 932 | clip_name, |
| 933 | # `CLIPTextModel` does not support device_map="auto" |
| 934 | # device_map="auto" |
| 935 | ) |
| 936 | |
| 937 | print("done loading CLIP text encoder") |
| 938 | |
| 939 | return text_encoder_model, tokenizer_model |
| 940 | |
| 941 | |
| 942 | def prior(*, args, checkpoint_map_location): |
no test coverage detected
searching dependent graphs…