r""" Function for text-to-image generation. Args: prompt (`str` or `List[str]`): The prompt or prompts to guide the image generation. negative_prompt (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the ima
(
self,
prompt: Union[str, List[str]],
negative_prompt: Optional[Union[str, List[str]]] = None,
height: int = 512,
width: int = 512,
num_inference_steps: int = 50,
guidance_scale: float = 7.5,
num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0,
generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None,
max_embeddings_multiples: Optional[int] = 3,
output_type: Optional[str] = "pil",
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: Optional[int] = 1,
**kwargs,
)
| 1300 | # return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) |
| 1301 | |
| 1302 | def text2img( |
| 1303 | self, |
| 1304 | prompt: Union[str, List[str]], |
| 1305 | negative_prompt: Optional[Union[str, List[str]]] = None, |
| 1306 | height: int = 512, |
| 1307 | width: int = 512, |
| 1308 | num_inference_steps: int = 50, |
| 1309 | guidance_scale: float = 7.5, |
| 1310 | num_images_per_prompt: Optional[int] = 1, |
| 1311 | eta: float = 0.0, |
| 1312 | generator: Optional[torch.Generator] = None, |
| 1313 | latents: Optional[torch.FloatTensor] = None, |
| 1314 | max_embeddings_multiples: Optional[int] = 3, |
| 1315 | output_type: Optional[str] = "pil", |
| 1316 | return_dict: bool = True, |
| 1317 | callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, |
| 1318 | callback_steps: Optional[int] = 1, |
| 1319 | **kwargs, |
| 1320 | ): |
| 1321 | r""" |
| 1322 | Function for text-to-image generation. |
| 1323 | Args: |
| 1324 | prompt (`str` or `List[str]`): |
| 1325 | The prompt or prompts to guide the image generation. |
| 1326 | negative_prompt (`str` or `List[str]`, *optional*): |
| 1327 | The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored |
| 1328 | if `guidance_scale` is less than `1`). |
| 1329 | height (`int`, *optional*, defaults to 512): |
| 1330 | The height in pixels of the generated image. |
| 1331 | width (`int`, *optional*, defaults to 512): |
| 1332 | The width in pixels of the generated image. |
| 1333 | num_inference_steps (`int`, *optional*, defaults to 50): |
| 1334 | The number of denoising steps. More denoising steps usually lead to a higher quality image at the |
| 1335 | expense of slower inference. |
| 1336 | guidance_scale (`float`, *optional*, defaults to 7.5): |
| 1337 | Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). |
| 1338 | `guidance_scale` is defined as `w` of equation 2. of [Imagen |
| 1339 | Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > |
| 1340 | 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, |
| 1341 | usually at the expense of lower image quality. |
| 1342 | num_images_per_prompt (`int`, *optional*, defaults to 1): |
| 1343 | The number of images to generate per prompt. |
| 1344 | eta (`float`, *optional*, defaults to 0.0): |
| 1345 | Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to |
| 1346 | [`schedulers.DDIMScheduler`], will be ignored for others. |
| 1347 | generator (`torch.Generator`, *optional*): |
| 1348 | A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation |
| 1349 | deterministic. |
| 1350 | latents (`torch.FloatTensor`, *optional*): |
| 1351 | Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image |
| 1352 | generation. Can be used to tweak the same generation with different prompts. If not provided, a latents |
| 1353 | tensor will ge generated by sampling using the supplied random `generator`. |
| 1354 | max_embeddings_multiples (`int`, *optional*, defaults to `3`): |
| 1355 | The max multiple length of prompt embeddings compared to the max output length of text encoder. |
| 1356 | output_type (`str`, *optional*, defaults to `"pil"`): |
| 1357 | The output format of the generate image. Choose between |
| 1358 | [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. |
| 1359 | return_dict (`bool`, *optional*, defaults to `True`): |