MCPcopy
hub / github.com/FareedKhan-dev/train-llm-from-scratch / MetricsLogger

Class MetricsLogger

src/post_training/logging_utils.py:18–58  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

16
17
18class MetricsLogger:
19 def __init__(
20 self,
21 stage: str,
22 log_dir: str,
23 *,
24 use_wandb: bool = False,
25 wandb_project: str = "train-llm-from-scratch-posttrain",
26 config: dict | None = None,
27 run_name: str | None = None,
28 ) -> None:
29 os.makedirs(log_dir, exist_ok=True)
30 self.stage = stage
31 # No Date.now in scripts here; use time.time() (allowed) for unique filenames.
32 stamp = int(time.time())
33 self.path = os.path.join(log_dir, f"{stage}_{stamp}.jsonl")
34 self._fh = open(self.path, "a")
35 self._wandb = None
36 if use_wandb:
37 try:
38 import wandb
39
40 wandb.init(project=wandb_project, name=run_name or f"{stage}-{stamp}", config=config or {})
41 self._wandb = wandb
42 except Exception as e: # noqa: BLE001 - logging must never crash training
43 print(f"[logger] wandb disabled ({e}); JSONL logging only -> {self.path}")
44 print(f"[logger] stage={stage} -> {self.path}")
45
46 def log(self, step: int, metrics: dict[str, Any]) -> None:
47 record = {"step": step, "wall": time.time(), **metrics}
48 self._fh.write(json.dumps(record) + "\n")
49 self._fh.flush()
50 if self._wandb is not None:
51 self._wandb.log(metrics, step=step)
52
53 def close(self) -> None:
54 try:
55 self._fh.close()
56 finally:
57 if self._wandb is not None:
58 self._wandb.finish()

Callers 6

mainFunction · 0.90
mainFunction · 0.90
mainFunction · 0.90
mainFunction · 0.90
mainFunction · 0.90
mainFunction · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected