Class MetricsLogger

src/post_training/logging_utils.py:18–58 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

16
17
18	class MetricsLogger:
19	def __init__(
20	self,
21	stage: str,
22	log_dir: str,
23	*,
24	use_wandb: bool = False,
25	wandb_project: str = "train-llm-from-scratch-posttrain",
26	config: dict \| None = None,
27	run_name: str \| None = None,
28	) -> None:
29	os.makedirs(log_dir, exist_ok=True)
30	self.stage = stage
31	# No Date.now in scripts here; use time.time() (allowed) for unique filenames.
32	stamp = int(time.time())
33	self.path = os.path.join(log_dir, f"{stage}_{stamp}.jsonl")
34	self._fh = open(self.path, "a")
35	self._wandb = None
36	if use_wandb:
37	try:
38	import wandb
39
40	wandb.init(project=wandb_project, name=run_name or f"{stage}-{stamp}", config=config or {})
41	self._wandb = wandb
42	except Exception as e: # noqa: BLE001 - logging must never crash training
43	print(f"[logger] wandb disabled ({e}); JSONL logging only -> {self.path}")
44	print(f"[logger] stage={stage} -> {self.path}")
45
46	def log(self, step: int, metrics: dict[str, Any]) -> None:
47	record = {"step": step, "wall": time.time(), **metrics}
48	self._fh.write(json.dumps(record) + "\n")
49	self._fh.flush()
50	if self._wandb is not None:
51	self._wandb.log(metrics, step=step)
52
53	def close(self) -> None:
54	try:
55	self._fh.close()
56	finally:
57	if self._wandb is not None:
58	self._wandb.finish()

mainFunction · 0.90

no outgoing calls

no test coverage detected