MCPcopy
hub / github.com/HKUDS/LightRAG / _run

Function _run

tests/parser/test_parse_native_lightrag_e2e.py:103–183  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

101 therefore identical chunk_args_hash on chunk-0."""
102
103 async def _run():
104 input_dir = tmp_path / "input"
105 input_dir.mkdir()
106 monkeypatch.setenv("INPUT_DIR", str(input_dir))
107
108 source_path = input_dir / "stable.docx"
109 source_path.write_bytes(b"fake docx bytes")
110
111 # Stub extract_docx_blocks at the adapter so the upstream DOCX
112 # parser is never invoked. The adapter still does all the
113 # LightRAG-specific writing — that is what we want under test.
114 stable_blocks = [
115 _block(
116 "Title\nFirst paragraph body.\nSecond paragraph body.",
117 heading="Title",
118 level=1,
119 ),
120 ]
121
122 def _stub_extract(file_path, drawing_context=None, **kwargs):
123 return [dict(b) for b in stable_blocks]
124
125 monkeypatch.setattr(
126 "lightrag.parser.docx.parse_document.extract_docx_blocks",
127 _stub_extract,
128 )
129
130 rag = _MiniRag(tmp_path / "work")
131
132 # ---- First parse ----
133 # parse_native archives the source after writing, so re-create it
134 # before the second parse for a fair comparison.
135 result1 = await _parse_via_registry(
136 rag,
137 "native",
138 "doc-stable",
139 str(source_path),
140 {"parse_format": FULL_DOCS_FORMAT_PENDING_PARSE, "content": ""},
141 )
142 merged1 = result1["content"]
143 assert merged1, "first parse produced empty merged_text"
144
145 # ---- Second parse ----
146 # Restore the source file (archive moved it), reset the in-memory
147 # full_docs row, and remove the parsed_dir so the writer rewrites
148 # both meta (with a fresh parse_time) and content lines.
149 source_path.write_bytes(b"fake docx bytes")
150 rag.full_docs.data.clear()
151 parsed_artifact_dir = input_dir / PARSED_DIR_NAME / f"{source_path.name}.parsed"
152 if parsed_artifact_dir.exists():
153 import shutil
154
155 shutil.rmtree(parsed_artifact_dir)
156
157 result2 = await _parse_via_registry(
158 rag,
159 "native",
160 "doc-stable",

Calls 13

compute_args_hashFunction · 0.90
_MiniRagClass · 0.85
openFunction · 0.85
setFunction · 0.85
listFunction · 0.85
clearMethod · 0.80
existsMethod · 0.80
formatMethod · 0.80
readMethod · 0.80
_blockFunction · 0.70
_parse_via_registryFunction · 0.70

Tested by

no test coverage detected