Run all vision cache tests for a single model.
(model_path: str, ssd_dir: Optional[str] = None)
| 45 | |
| 46 | |
| 47 | def test_model(model_path: str, ssd_dir: Optional[str] = None) -> bool: |
| 48 | """Run all vision cache tests for a single model.""" |
| 49 | from mlx_vlm.utils import load as vlm_load, prepare_inputs |
| 50 | |
| 51 | from omlx.engine.vlm import _patch_gemma4_vision_tower, _patch_video_processor_bug |
| 52 | from omlx.utils.image import compute_image_hash |
| 53 | |
| 54 | print(f"\n{'='*60}") |
| 55 | print(f"Testing: {model_path}") |
| 56 | print(f"{'='*60}") |
| 57 | |
| 58 | # ── Step 1: Load model ────────────────────────────────────── |
| 59 | print("\n[1/6] Loading model...") |
| 60 | _patch_video_processor_bug() |
| 61 | _patch_gemma4_vision_tower(None) |
| 62 | vlm_model, processor = vlm_load(model_path) |
| 63 | |
| 64 | model_type = getattr(vlm_model.config, "model_type", "unknown") |
| 65 | has_encode_image = hasattr(vlm_model, "encode_image") |
| 66 | |
| 67 | print(f" model_type: {model_type}") |
| 68 | print(f" has encode_image: {has_encode_image}") |
| 69 | print(f" in _QWEN_VISION_MODELS: {model_type in _QWEN_VISION_MODELS}") |
| 70 | print(f" is llava: {model_type == 'llava'}") |
| 71 | |
| 72 | # ── Step 2: Prepare inputs ────────────────────────────────── |
| 73 | print("\n[2/6] Preparing vision inputs...") |
| 74 | test_image = create_test_image(336, 336) |
| 75 | image_hash = compute_image_hash([test_image]) |
| 76 | print(f" image_hash: {image_hash[:16]}...") |
| 77 | |
| 78 | tokenizer = getattr(processor, "tokenizer", processor) |
| 79 | |
| 80 | # Use mlx-vlm's apply_chat_template to properly insert image tokens. |
| 81 | # Different models use different image placeholder formats. |
| 82 | from mlx_vlm.prompt_utils import apply_chat_template as vlm_apply_template |
| 83 | |
| 84 | messages = [{"role": "user", "content": "Describe this image."}] |
| 85 | try: |
| 86 | prompt = vlm_apply_template( |
| 87 | processor, vlm_model.config, messages, num_images=1 |
| 88 | ) |
| 89 | except Exception: |
| 90 | # Fallback: try tokenizer directly |
| 91 | try: |
| 92 | prompt = tokenizer.apply_chat_template( |
| 93 | messages, tokenize=False, add_generation_prompt=True |
| 94 | ) |
| 95 | except Exception: |
| 96 | prompt = "Describe this image." |
| 97 | |
| 98 | inputs = prepare_inputs( |
| 99 | processor, images=[test_image], prompts=[prompt] |
| 100 | ) |
| 101 | input_ids = inputs["input_ids"] |
| 102 | pixel_values = inputs.get("pixel_values") |
| 103 | attention_mask = inputs.get("attention_mask") |
| 104 | extra_model_inputs = { |
no test coverage detected