| 870 | return backend_pb2.Result(message="Media generated", success=True) |
| 871 | |
| 872 | def GenerateVideo(self, request, context): |
| 873 | try: |
| 874 | prompt = request.prompt |
| 875 | if not prompt: |
| 876 | print(f"GenerateVideo: No prompt provided for video generation.", file=sys.stderr) |
| 877 | return backend_pb2.Result(success=False, message="No prompt provided for video generation") |
| 878 | |
| 879 | # Debug: Print raw request values |
| 880 | print(f"GenerateVideo: Raw request values - num_frames: {request.num_frames}, fps: {request.fps}, cfg_scale: {request.cfg_scale}, step: {request.step}", file=sys.stderr) |
| 881 | |
| 882 | # Set default values from request or use defaults |
| 883 | num_frames = request.num_frames if request.num_frames > 0 else 81 |
| 884 | fps = request.fps if request.fps > 0 else 16 |
| 885 | cfg_scale = request.cfg_scale if request.cfg_scale > 0 else 4.0 |
| 886 | num_inference_steps = request.step if request.step > 0 else 40 |
| 887 | |
| 888 | print(f"GenerateVideo: Using values - num_frames: {num_frames}, fps: {fps}, cfg_scale: {cfg_scale}, num_inference_steps: {num_inference_steps}", file=sys.stderr) |
| 889 | |
| 890 | # Prepare generation parameters |
| 891 | kwargs = { |
| 892 | "prompt": prompt, |
| 893 | "negative_prompt": request.negative_prompt if request.negative_prompt else "", |
| 894 | "height": request.height if request.height > 0 else 720, |
| 895 | "width": request.width if request.width > 0 else 1280, |
| 896 | "num_frames": num_frames, |
| 897 | "guidance_scale": cfg_scale, |
| 898 | "num_inference_steps": num_inference_steps, |
| 899 | } |
| 900 | |
| 901 | # Add custom options from self.options (including guidance_scale_2 if specified) |
| 902 | kwargs.update(self.options) |
| 903 | |
| 904 | # Set seed if provided |
| 905 | if request.seed > 0: |
| 906 | kwargs["generator"] = torch.Generator(device=self.device).manual_seed(request.seed) |
| 907 | |
| 908 | # Handle start and end images for video generation |
| 909 | if request.start_image: |
| 910 | kwargs["start_image"] = load_image(request.start_image) |
| 911 | if request.end_image: |
| 912 | kwargs["end_image"] = load_image(request.end_image) |
| 913 | |
| 914 | print(f"Generating video with {kwargs=}", file=sys.stderr) |
| 915 | print(f"GenerateVideo: Pipeline type: {self.PipelineType}, ltx2_pipeline flag: {self.ltx2_pipeline}", file=sys.stderr) |
| 916 | |
| 917 | # Generate video frames based on pipeline type |
| 918 | if self.ltx2_pipeline or self.PipelineType in ["LTX2Pipeline", "LTX2ImageToVideoPipeline"]: |
| 919 | # LTX-2 generation with audio (supports both text-to-video and image-to-video) |
| 920 | # Determine if this is text-to-video (no image) or image-to-video (has image) |
| 921 | has_image = bool(request.start_image) |
| 922 | |
| 923 | # Remove image-related parameters that might have been added earlier |
| 924 | kwargs.pop("start_image", None) |
| 925 | kwargs.pop("end_image", None) |
| 926 | |
| 927 | # LTX2ImageToVideoPipeline uses 'image' parameter for image-to-video |
| 928 | # LTX2Pipeline (text-to-video) doesn't need an image parameter |
| 929 | if has_image: |