Create and submit an OpenAI Batch API job. Args: client: An initialized OpenAI client. requests: List of BatchRequest objects to submit. description: Human-readable description for the batch. Returns: The batch ID for tracking the job. Raises: V
(
client: OpenAI,
requests: List[BatchRequest],
description: str = "ScrapeGraphAI batch scraping job",
)
| 111 | |
| 112 | |
| 113 | def create_batch( |
| 114 | client: OpenAI, |
| 115 | requests: List[BatchRequest], |
| 116 | description: str = "ScrapeGraphAI batch scraping job", |
| 117 | ) -> str: |
| 118 | """Create and submit an OpenAI Batch API job. |
| 119 | |
| 120 | Args: |
| 121 | client: An initialized OpenAI client. |
| 122 | requests: List of BatchRequest objects to submit. |
| 123 | description: Human-readable description for the batch. |
| 124 | |
| 125 | Returns: |
| 126 | The batch ID for tracking the job. |
| 127 | |
| 128 | Raises: |
| 129 | ValueError: If the number of requests exceeds the API limit. |
| 130 | """ |
| 131 | if len(requests) > MAX_REQUESTS_PER_BATCH: |
| 132 | raise ValueError( |
| 133 | f"Batch size {len(requests)} exceeds the maximum of " |
| 134 | f"{MAX_REQUESTS_PER_BATCH}. Split into multiple batches." |
| 135 | ) |
| 136 | |
| 137 | # Build JSONL content |
| 138 | jsonl_content = "\n".join(req.to_jsonl_line() for req in requests) |
| 139 | |
| 140 | logger.info( |
| 141 | f"Uploading batch input file with {len(requests)} requests..." |
| 142 | ) |
| 143 | |
| 144 | # Upload the input file |
| 145 | input_file = client.files.create( |
| 146 | file=io.BytesIO(jsonl_content.encode("utf-8")), |
| 147 | purpose="batch", |
| 148 | ) |
| 149 | |
| 150 | logger.info(f"Input file uploaded: {input_file.id}") |
| 151 | |
| 152 | # Create the batch |
| 153 | batch = client.batches.create( |
| 154 | input_file_id=input_file.id, |
| 155 | endpoint="/v1/chat/completions", |
| 156 | completion_window="24h", |
| 157 | metadata={"description": description}, |
| 158 | ) |
| 159 | |
| 160 | logger.info( |
| 161 | f"Batch created: {batch.id} (status: {batch.status})" |
| 162 | ) |
| 163 | |
| 164 | return batch.id |
| 165 | |
| 166 | |
| 167 | def get_batch_status(client: OpenAI, batch_id: str) -> BatchJobInfo: |
no test coverage detected