合并 S3 上的分片文件 使用 S3 的 multipart upload API 实现流式合并,避免内存问题
(self, upload_id: str, chunk_info: UploadChunk, save_path: str)
| 417 | ) |
| 418 | |
| 419 | async def merge_chunks(self, upload_id: str, chunk_info: UploadChunk, save_path: str) -> tuple[str, str]: |
| 420 | """ |
| 421 | 合并 S3 上的分片文件 |
| 422 | 使用 S3 的 multipart upload API 实现流式合并,避免内存问题 |
| 423 | """ |
| 424 | file_sha256 = hashlib.sha256() |
| 425 | chunk_dir = str(Path(save_path).parent / "chunks" / upload_id) |
| 426 | |
| 427 | async with self._client() as s3: |
| 428 | # 创建 multipart upload |
| 429 | mpu = await s3.create_multipart_upload( |
| 430 | Bucket=self.bucket_name, |
| 431 | Key=save_path, |
| 432 | ContentType='application/octet-stream' |
| 433 | ) |
| 434 | mpu_id = mpu['UploadId'] |
| 435 | parts = [] |
| 436 | |
| 437 | try: |
| 438 | # 按顺序读取、验证并上传每个分片 |
| 439 | for i in range(chunk_info.total_chunks): |
| 440 | chunk_key = f"{chunk_dir}/{i}.part" |
| 441 | chunk_record = await UploadChunk.filter(upload_id=upload_id, chunk_index=i).first() |
| 442 | if not chunk_record: |
| 443 | raise ValueError(f"分片{i}记录不存在") |
| 444 | |
| 445 | try: |
| 446 | response = await s3.get_object( |
| 447 | Bucket=self.bucket_name, |
| 448 | Key=chunk_key |
| 449 | ) |
| 450 | chunk_data = await response['Body'].read() |
| 451 | except Exception as e: |
| 452 | raise ValueError(f"分片{i}文件不存在: {e}") |
| 453 | |
| 454 | current_hash = hashlib.sha256(chunk_data).hexdigest() |
| 455 | if current_hash != chunk_record.chunk_hash: |
| 456 | raise ValueError(f"分片{i}哈希不匹配: 期望 {chunk_record.chunk_hash}, 实际 {current_hash}") |
| 457 | |
| 458 | file_sha256.update(chunk_data) |
| 459 | |
| 460 | # 上传分片到 multipart upload |
| 461 | part_response = await s3.upload_part( |
| 462 | Bucket=self.bucket_name, |
| 463 | Key=save_path, |
| 464 | UploadId=mpu_id, |
| 465 | PartNumber=i + 1, # S3 part numbers start at 1 |
| 466 | Body=chunk_data |
| 467 | ) |
| 468 | parts.append({ |
| 469 | 'PartNumber': i + 1, |
| 470 | 'ETag': part_response['ETag'] |
| 471 | }) |
| 472 | |
| 473 | # 释放内存 |
| 474 | del chunk_data |
| 475 | |
| 476 | # 完成 multipart upload |
no test coverage detected