kopia lustrzana https://github.com/jaymody/picoGPT
Some quality of life improvements.
rodzic
03d958f892
commit
dfb5df895a
|
@ -29,9 +29,6 @@ A quick breakdown of each of the files:
|
|||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
If you're using an M1 Macbook, you'll need to replace `tensorflow` with `tensorflow-macos`.
|
||||
|
||||
Tested on `Python 3.9.10`.
|
||||
|
||||
#### Usage
|
||||
|
|
2
gpt2.py
2
gpt2.py
|
@ -46,7 +46,7 @@ def mha(x, c_attn, c_proj, n_head): # [n_seq, n_embd] -> [n_seq, n_embd]
|
|||
qkv_heads = list(map(lambda x: np.split(x, n_head, axis=-1), qkv)) # [3, n_seq, n_embd] -> [3, n_head, n_seq, n_embd/n_head]
|
||||
|
||||
# causal mask to hide future inputs from being attended to
|
||||
causal_mask = (1 - np.tri(x.shape[0], dtype=np.float32)) * -1e10 # [n_seq, n_seq]
|
||||
causal_mask = (1 - np.tri(x.shape[0], dtype=x.dtype)) * -1e10 # [n_seq, n_seq]
|
||||
|
||||
# perform attention over each head
|
||||
out_heads = [attention(q, k, v, causal_mask) for q, k, v in zip(*qkv_heads)] # [3, n_head, n_seq, n_embd/n_head] -> [n_head, n_seq, n_embd/n_head]
|
||||
|
|
|
@ -24,7 +24,7 @@ def attention(q, k, v, mask):
|
|||
def mha(x, c_attn, c_proj, n_head):
|
||||
x = linear(x, **c_attn)
|
||||
qkv_heads = list(map(lambda x: np.split(x, n_head, axis=-1), np.split(x, 3, axis=-1)))
|
||||
causal_mask = (1 - np.tri(x.shape[0], dtype=np.float32)) * -1e10
|
||||
causal_mask = (1 - np.tri(x.shape[0], dtype=x.dtype)) * -1e10
|
||||
out_heads = [attention(q, k, v, causal_mask) for q, k, v in zip(*qkv_heads)]
|
||||
x = linear(np.hstack(out_heads), **c_proj)
|
||||
return x
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
numpy==1.24.1 # used for the actual model code/weights
|
||||
regex==2017.4.5 # used by the bpe tokenizer
|
||||
requests==2.27.1 # used to download gpt-2 files from openai
|
||||
tensorflow==2.11.0 # used to load the gpt-2 weights from the open-ai tf checkpoint
|
||||
tqdm==4.64.0 # progress bar to keep your sanity
|
||||
fire==0.5.0 # easy CLI creation
|
||||
|
||||
# used to load the gpt-2 weights from the open-ai tf checkpoint
|
||||
# M1 Macbooks require tensorflow-macos
|
||||
tensorflow==2.11.0; sys_platform != 'darwin' or platform_machine != 'arm64'
|
||||
tensorflow-macos==2.11.0; sys_platform == 'darwin' and platform_machine == 'arm64'
|
||||
|
|
5
utils.py
5
utils.py
|
@ -49,11 +49,10 @@ def load_gpt2_params_from_tf_ckpt(tf_ckpt_path, hparams):
|
|||
d[keys[0]] = set_in_nested_dict(d[keys[0]], keys[1:], val)
|
||||
return d
|
||||
|
||||
init_vars = tf.train.list_variables(tf_ckpt_path)
|
||||
params = {"blocks": [{} for _ in range(hparams["n_layer"])]}
|
||||
for name, _ in init_vars:
|
||||
for name, _ in tf.train.list_variables(tf_ckpt_path):
|
||||
array = np.squeeze(tf.train.load_variable(tf_ckpt_path, name))
|
||||
name = name.removeprefix("model/")
|
||||
name = name[len("model/") :]
|
||||
if name.startswith("h"):
|
||||
m = re.match(r"h([0-9]+)/(.*)", name)
|
||||
n = int(m[1])
|
||||
|
|
Ładowanie…
Reference in New Issue