MCPcopy Index your code
hub / github.com/vastsa/FileCodeBox / merge_chunks

Method merge_chunks

core/storage.py:419–492  ·  view source on GitHub ↗

合并 S3 上的分片文件 使用 S3 的 multipart upload API 实现流式合并,避免内存问题

(self, upload_id: str, chunk_info: UploadChunk, save_path: str)

Source from the content-addressed store, hash-verified

417 )
418
419 async def merge_chunks(self, upload_id: str, chunk_info: UploadChunk, save_path: str) -> tuple[str, str]:
420 """
421 合并 S3 上的分片文件
422 使用 S3 的 multipart upload API 实现流式合并,避免内存问题
423 """
424 file_sha256 = hashlib.sha256()
425 chunk_dir = str(Path(save_path).parent / "chunks" / upload_id)
426
427 async with self._client() as s3:
428 # 创建 multipart upload
429 mpu = await s3.create_multipart_upload(
430 Bucket=self.bucket_name,
431 Key=save_path,
432 ContentType='application/octet-stream'
433 )
434 mpu_id = mpu['UploadId']
435 parts = []
436
437 try:
438 # 按顺序读取、验证并上传每个分片
439 for i in range(chunk_info.total_chunks):
440 chunk_key = f"{chunk_dir}/{i}.part"
441 chunk_record = await UploadChunk.filter(upload_id=upload_id, chunk_index=i).first()
442 if not chunk_record:
443 raise ValueError(f"分片{i}记录不存在")
444
445 try:
446 response = await s3.get_object(
447 Bucket=self.bucket_name,
448 Key=chunk_key
449 )
450 chunk_data = await response['Body'].read()
451 except Exception as e:
452 raise ValueError(f"分片{i}文件不存在: {e}")
453
454 current_hash = hashlib.sha256(chunk_data).hexdigest()
455 if current_hash != chunk_record.chunk_hash:
456 raise ValueError(f"分片{i}哈希不匹配: 期望 {chunk_record.chunk_hash}, 实际 {current_hash}")
457
458 file_sha256.update(chunk_data)
459
460 # 上传分片到 multipart upload
461 part_response = await s3.upload_part(
462 Bucket=self.bucket_name,
463 Key=save_path,
464 UploadId=mpu_id,
465 PartNumber=i + 1, # S3 part numbers start at 1
466 Body=chunk_data
467 )
468 parts.append({
469 'PartNumber': i + 1,
470 'ETag': part_response['ETag']
471 })
472
473 # 释放内存
474 del chunk_data
475
476 # 完成 multipart upload

Callers 1

complete_uploadFunction · 0.45

Calls 4

_clientMethod · 0.95
readMethod · 0.80
firstMethod · 0.45
filterMethod · 0.45

Tested by

no test coverage detected