MCPcopy
hub / github.com/karpathy/nanochat / CustomJSON

Class CustomJSON

tasks/customjson.py:10–64  ·  view source on GitHub ↗

Load conversations from a JSONL file. Each line should be a JSON array of message objects with 'role' and 'content' fields. Example line: [{"role":"user","content":"Hi"},{"role":"assistant","content":"Hello"}]

Source from the content-addressed store, hash-verified

8from tasks.common import Task
9
10class CustomJSON(Task):
11 """
12 Load conversations from a JSONL file.
13 Each line should be a JSON array of message objects with 'role' and 'content' fields.
14 Example line: [{"role":"user","content":"Hi"},{"role":"assistant","content":"Hello"}]
15 """
16
17 def __init__(self, filepath, **kwargs):
18 super().__init__(**kwargs)
19 self.filepath = filepath
20 self.conversations = []
21
22 # Load all conversations from the JSONL file
23 if not os.path.exists(filepath):
24 # Helpful error message due to recent change. Will be removed in the future.
25 print("-" * 80)
26 print(f"Warning: File {filepath} does not exist")
27 print("HINT (Oct 21 2025)")
28 print("If you recently did a git pull and suddenly see this, it might be due to the new addition of identity conversations")
29 print("See this discussion for more details: https://github.com/karpathy/nanochat/discussions/139")
30 print("Quick fix: simply run the following command to download the file and you're done:")
31 print(f"curl -L -o {filepath} https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl")
32 print("-" * 80)
33
34 else:
35 with open(filepath, 'r', encoding='utf-8') as f:
36 for line in f:
37 line = line.strip()
38 if not line: # skip empty lines
39 continue
40 messages = json.loads(line)
41 # Validate the conversation structure
42 assert isinstance(messages, list), f"Expected list of messages, got {type(messages)}"
43 assert len(messages) >= 2, f"Conversation must have at least 2 messages, got {len(messages)}"
44 # Validate message structure and alternating roles
45 for i, message in enumerate(messages):
46 assert "role" in message, f"Message {i} missing 'role' field"
47 assert "content" in message, f"Message {i} missing 'content' field"
48 expected_role = "user" if i % 2 == 0 else "assistant"
49 assert message["role"] == expected_role, f"Message {i} has role {message['role']} but should be {expected_role}"
50 assert isinstance(message["content"], str), f"Message {i} content must be a string"
51
52 self.conversations.append(messages)
53
54 self.length = len(self.conversations)
55
56 def num_examples(self):
57 return self.length
58
59 def get_example(self, index):
60 messages = self.conversations[index]
61 conversation = {
62 "messages": messages,
63 }
64 return conversation
65

Callers 1

chat_sft.pyFile · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected