MCPcopy Index your code
hub / github.com/TaskingAI/TaskingAI / process_content

Function process_content

backend/app/operators/retrieval/record.py:17–62  ·  view source on GitHub ↗
(
    collection: Collection,
    type: RecordType,
    title: str,
    text_splitter: TextSplitter,
    max_num_chunks: int,
    content: Optional[str] = None,
    file_id: Optional[str] = None,
    url: Optional[str] = None,
)

Source from the content-addressed store, hash-verified

15
16
17async def process_content(
18 collection: Collection,
19 type: RecordType,
20 title: str,
21 text_splitter: TextSplitter,
22 max_num_chunks: int,
23 content: Optional[str] = None,
24 file_id: Optional[str] = None,
25 url: Optional[str] = None,
26):
27 from app.services.retrieval.embedding import embed_documents
28
29 # split content into chunks
30 db_content = await load_db_content(
31 record_type=type,
32 content=content,
33 file_id=file_id,
34 url=url,
35 )
36
37 content_to_split = await load_content_to_split(
38 record_type=type,
39 content=content,
40 file_id=file_id,
41 url=url,
42 )
43
44 # embed the documents
45 chunk_text_list, num_tokens_list = text_splitter.split_text(text=content_to_split, title=title)
46 if len(chunk_text_list) > max_num_chunks:
47 raise_http_error(
48 ErrorCode.RESOURCE_LIMIT_REACHED,
49 "The collection has no enough capacity to store the new chunks created from the record content.",
50 )
51
52 # validate model
53 embedding_model = await model_ops.get(model_id=collection.embedding_model_id)
54
55 # embed the documents
56 embeddings = await embed_documents(
57 documents=chunk_text_list,
58 embedding_model=embedding_model,
59 embedding_size=collection.embedding_size,
60 )
61
62 return chunk_text_list, num_tokens_list, embeddings, db_content
63
64
65class RecordModelOperator(PostgresModelOperator):

Callers 2

createMethod · 0.85
updateMethod · 0.85

Calls 6

load_db_contentFunction · 0.90
load_content_to_splitFunction · 0.90
raise_http_errorFunction · 0.90
embed_documentsFunction · 0.90
split_textMethod · 0.80
getMethod · 0.45

Tested by

no test coverage detected