From 783874049ba9d98893fa5e3d6892d0d7d45c968b Mon Sep 17 00:00:00 2001 From: erenup Date: Thu, 2 Sep 2021 08:18:15 +0800 Subject: [PATCH] fix --- .../4.6-生成任务-机器翻译.ipynb | 69 +------------------ .../4.6-生成任务-机器翻译.md | 61 ---------------- 2 files changed, 2 insertions(+), 128 deletions(-) diff --git a/docs/篇章4-使用Transformers解决NLP任务/4.6-生成任务-机器翻译.ipynb b/docs/篇章4-使用Transformers解决NLP任务/4.6-生成任务-机器翻译.ipynb index 49d6922..dbbf9cb 100644 --- a/docs/篇章4-使用Transformers解决NLP任务/4.6-生成任务-机器翻译.ipynb +++ b/docs/篇章4-使用Transformers解决NLP任务/4.6-生成任务-机器翻译.ipynb @@ -957,76 +957,11 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "source": [ "trainer.train()" ], - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 1%|▏ | 500/38145 [1:05:10<91:20:54, 8.74s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'loss': 0.8588, 'learning_rate': 1.973784244330843e-05, 'epoch': 0.01}\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 3%|▎ | 1000/38145 [2:09:32<73:56:07, 7.17s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'loss': 0.8343, 'learning_rate': 1.947568488661686e-05, 'epoch': 0.03}\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 4%|▍ | 1500/38145 [3:03:53<57:17:10, 5.63s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'loss': 0.8246, 'learning_rate': 1.9213527329925285e-05, 'epoch': 0.04}\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 5%|▌ | 1980/38145 [3:52:54<67:46:36, 6.75s/it]" - ] - }, - { - "output_type": "error", - "ename": "KeyboardInterrupt", - "evalue": "", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/var/folders/2k/x3py0v857kgcwqvvl00xxhxw0000gn/T/ipykernel_15169/4032920361.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/Desktop/zhihu/learn-nlp-with-transformers/venv/lib/python3.8/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, **kwargs)\u001b[0m\n\u001b[1;32m 1079\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1080\u001b[0m \u001b[0;31m# Revert to normal clipping otherwise, handling Apex or full precision\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1081\u001b[0;31m torch.nn.utils.clip_grad_norm_(\n\u001b[0m\u001b[1;32m 1082\u001b[0m \u001b[0mamp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaster_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_apex\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1083\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_grad_norm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Desktop/zhihu/learn-nlp-with-transformers/venv/lib/python3.8/site-packages/torch/nn/utils/clip_grad.py\u001b[0m in \u001b[0;36mclip_grad_norm_\u001b[0;34m(parameters, max_norm, norm_type, error_if_nonfinite)\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0mtotal_norm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnorms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorms\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 42\u001b[0;31m \u001b[0mtotal_norm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnorm_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnorm_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtotal_norm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mtotal_norm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misinf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merror_if_nonfinite\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Desktop/zhihu/learn-nlp-with-transformers/venv/lib/python3.8/site-packages/torch/nn/utils/clip_grad.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0mtotal_norm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnorms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorms\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 42\u001b[0;31m \u001b[0mtotal_norm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnorm_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnorm_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtotal_norm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mtotal_norm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misinf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merror_if_nonfinite\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Desktop/zhihu/learn-nlp-with-transformers/venv/lib/python3.8/site-packages/torch/functional.py\u001b[0m in \u001b[0;36mnorm\u001b[0;34m(input, p, dim, keepdim, out, dtype)\u001b[0m\n\u001b[1;32m 1310\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1311\u001b[0m \u001b[0m_dim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mndim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;31m# noqa: C416 TODO: rewrite as list(range(m))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1312\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_VF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_dim\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeepdim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkeepdim\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[attr-defined]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1314\u001b[0m \u001b[0;31m# TODO: when https://github.com/pytorch/pytorch/issues/33782 is fixed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "metadata": { "id": "uNx5pyRlIrJh", "scrolled": false diff --git a/docs/篇章4-使用Transformers解决NLP任务/4.6-生成任务-机器翻译.md b/docs/篇章4-使用Transformers解决NLP任务/4.6-生成任务-机器翻译.md index f1f2fec..ae83314 100644 --- a/docs/篇章4-使用Transformers解决NLP任务/4.6-生成任务-机器翻译.md +++ b/docs/篇章4-使用Transformers解决NLP任务/4.6-生成任务-机器翻译.md @@ -486,67 +486,6 @@ trainer = Seq2SeqTrainer( trainer.train() ``` - 1%|▏ | 500/38145 [1:05:10<91:20:54, 8.74s/it] - - {'loss': 0.8588, 'learning_rate': 1.973784244330843e-05, 'epoch': 0.01} - - - 3%|▎ | 1000/38145 [2:09:32<73:56:07, 7.17s/it] - - {'loss': 0.8343, 'learning_rate': 1.947568488661686e-05, 'epoch': 0.03} - - - 4%|▍ | 1500/38145 [3:03:53<57:17:10, 5.63s/it] - - {'loss': 0.8246, 'learning_rate': 1.9213527329925285e-05, 'epoch': 0.04} - - - 5%|▌ | 1980/38145 [3:52:54<67:46:36, 6.75s/it] - - - --------------------------------------------------------------------------- - - KeyboardInterrupt Traceback (most recent call last) - - /var/folders/2k/x3py0v857kgcwqvvl00xxhxw0000gn/T/ipykernel_15169/4032920361.py in - ----> 1 trainer.train() - - - ~/Desktop/zhihu/learn-nlp-with-transformers/venv/lib/python3.8/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, **kwargs) - 1079 else: - 1080 # Revert to normal clipping otherwise, handling Apex or full precision - -> 1081 torch.nn.utils.clip_grad_norm_( - 1082 amp.master_params(self.optimizer) if self.use_apex else model.parameters(), - 1083 self.args.max_grad_norm, - - - ~/Desktop/zhihu/learn-nlp-with-transformers/venv/lib/python3.8/site-packages/torch/nn/utils/clip_grad.py in clip_grad_norm_(parameters, max_norm, norm_type, error_if_nonfinite) - 40 total_norm = norms[0] if len(norms) == 1 else torch.max(torch.stack(norms)) - 41 else: - ---> 42 total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type) - 43 if total_norm.isnan() or total_norm.isinf(): - 44 if error_if_nonfinite: - - - ~/Desktop/zhihu/learn-nlp-with-transformers/venv/lib/python3.8/site-packages/torch/nn/utils/clip_grad.py in (.0) - 40 total_norm = norms[0] if len(norms) == 1 else torch.max(torch.stack(norms)) - 41 else: - ---> 42 total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type) - 43 if total_norm.isnan() or total_norm.isinf(): - 44 if error_if_nonfinite: - - - ~/Desktop/zhihu/learn-nlp-with-transformers/venv/lib/python3.8/site-packages/torch/functional.py in norm(input, p, dim, keepdim, out, dtype) - 1310 if not isinstance(p, str): - 1311 _dim = [i for i in range(ndim)] # noqa: C416 TODO: rewrite as list(range(m)) - -> 1312 return _VF.norm(input, p, dim=_dim, keepdim=keepdim) # type: ignore[attr-defined] - 1313 - 1314 # TODO: when https://github.com/pytorch/pytorch/issues/33782 is fixed - - - KeyboardInterrupt: - - 最后别忘了,查看如何上传模型 ,上传模型到](https://huggingface.co/transformers/model_sharing.html) 到[🤗 Model Hub](https://huggingface.co/models)。随后您就可以像这个notebook一开始一样,直接用模型名字就能使用您的模型啦。