Some quality of life improvements.

pull/17/head
jaymody 2023-02-17 10:27:13 -05:00
rodzic 03d958f892
commit dfb5df895a
5 zmienionych plików z 9 dodań i 9 usunięć

Wyświetl plik

@ -29,9 +29,6 @@ A quick breakdown of each of the files:
```bash
pip install -r requirements.txt
```
If you're using an M1 Macbook, you'll need to replace `tensorflow` with `tensorflow-macos`.
Tested on `Python 3.9.10`.
#### Usage

Wyświetl plik

@ -46,7 +46,7 @@ def mha(x, c_attn, c_proj, n_head): # [n_seq, n_embd] -> [n_seq, n_embd]
qkv_heads = list(map(lambda x: np.split(x, n_head, axis=-1), qkv)) # [3, n_seq, n_embd] -> [3, n_head, n_seq, n_embd/n_head]
# causal mask to hide future inputs from being attended to
causal_mask = (1 - np.tri(x.shape[0], dtype=np.float32)) * -1e10 # [n_seq, n_seq]
causal_mask = (1 - np.tri(x.shape[0], dtype=x.dtype)) * -1e10 # [n_seq, n_seq]
# perform attention over each head
out_heads = [attention(q, k, v, causal_mask) for q, k, v in zip(*qkv_heads)] # [3, n_head, n_seq, n_embd/n_head] -> [n_head, n_seq, n_embd/n_head]

Wyświetl plik

@ -24,7 +24,7 @@ def attention(q, k, v, mask):
def mha(x, c_attn, c_proj, n_head):
x = linear(x, **c_attn)
qkv_heads = list(map(lambda x: np.split(x, n_head, axis=-1), np.split(x, 3, axis=-1)))
causal_mask = (1 - np.tri(x.shape[0], dtype=np.float32)) * -1e10
causal_mask = (1 - np.tri(x.shape[0], dtype=x.dtype)) * -1e10
out_heads = [attention(q, k, v, causal_mask) for q, k, v in zip(*qkv_heads)]
x = linear(np.hstack(out_heads), **c_proj)
return x

Wyświetl plik

@ -1,6 +1,10 @@
numpy==1.24.1 # used for the actual model code/weights
regex==2017.4.5 # used by the bpe tokenizer
requests==2.27.1 # used to download gpt-2 files from openai
tensorflow==2.11.0 # used to load the gpt-2 weights from the open-ai tf checkpoint
tqdm==4.64.0 # progress bar to keep your sanity
fire==0.5.0 # easy CLI creation
# used to load the gpt-2 weights from the open-ai tf checkpoint
# M1 Macbooks require tensorflow-macos
tensorflow==2.11.0; sys_platform != 'darwin' or platform_machine != 'arm64'
tensorflow-macos==2.11.0; sys_platform == 'darwin' and platform_machine == 'arm64'

Wyświetl plik

@ -49,11 +49,10 @@ def load_gpt2_params_from_tf_ckpt(tf_ckpt_path, hparams):
d[keys[0]] = set_in_nested_dict(d[keys[0]], keys[1:], val)
return d
init_vars = tf.train.list_variables(tf_ckpt_path)
params = {"blocks": [{} for _ in range(hparams["n_layer"])]}
for name, _ in init_vars:
for name, _ in tf.train.list_variables(tf_ckpt_path):
array = np.squeeze(tf.train.load_variable(tf_ckpt_path, name))
name = name.removeprefix("model/")
name = name[len("model/") :]
if name.startswith("h"):
m = re.match(r"h([0-9]+)/(.*)", name)
n = int(m[1])