MCPcopy Index your code
hub / github.com/LawRefBook/Laws / CasesParser

Class CasesParser

scripts/convert.py:64–134  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

62
63
64class CasesParser(object):
65
66 def __init__(self) -> None:
67 self.filename = "./__cache__/案例.txt"
68
69 def __slice_content(self, content: str) -> List[str]:
70 ret = []
71
72 for line in content.split("。"):
73 if len(ret) == 0 or len(ret[-1]) + len(line) > 200:
74 if len(ret) > 0:
75 ret[-1] += "。"
76 ret.append("")
77
78 if ret[-1]:
79 ret[-1] += "。"
80 ret[-1] += line
81
82 return ret
83
84 def parse(self) -> List[Case]:
85 with open(self.filename, "r") as f:
86 data = filter(
87 lambda x: x,
88 map(
89 lambda x: x.strip(),
90 f.readlines()
91 )
92 )
93
94 cases: List[Case] = []
95 title_at = 0
96 for no, line in enumerate(data):
97 newCase = isTitle(line)
98 if len(cases) == 0 or newCase:
99 cases.append(Case())
100 case = cases[-1]
101 if title := isTitle(line):
102 case.title = title
103 title_at = no
104 continue
105 if no == title_at + 1 and re.match(r"^[——-]", line):
106 case.subtitle = line.strip("——-")
107 continue
108
109 if isSection(line):
110 case.content.append(f"## {line.strip('【】')}")
111 else:
112 case.content += self.__slice_content(line)
113 return cases
114
115 def write(self, cases: List[Case]):
116 ret_json = []
117 for case in cases:
118 case_json = {
119 "name": case.title,
120 "level": "案例",
121 "id": str(uuid4()),

Callers 1

convert.pyFile · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected