| 31 | |
| 32 | @dataclass |
| 33 | class ModelConfig: |
| 34 | block_size: int = None # length of the input sequences of integers |
| 35 | vocab_size: int = None # the input integers are in range [0 .. vocab_size -1] |
| 36 | # parameters below control the sizes of each model slightly differently |
| 37 | n_layer: int = 4 |
| 38 | n_embd: int = 64 |
| 39 | n_embd2: int = 64 |
| 40 | n_head: int = 4 |
| 41 | |
| 42 | # ----------------------------------------------------------------------------- |
| 43 | # Transformer Language Model (*exactly* as used in GPT-2) |