add jupyter
This commit is contained in:
parent
0b8e391209
commit
b0cd45d468
|
@ -5,6 +5,10 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## 前言\n",
|
"## 前言\n",
|
||||||
|
"本文包含大量源码和讲解,通过段落和横线分割了各个模块,同时网站配备了侧边栏,帮助大家在各个小节中快速跳转,希望大家阅读完能对BERT有深刻的了解。同时建议通过pycharm、vscode等工具对bert源码进行单步调试,调试到对应的模块再对比看本章节的讲解。\n",
|
||||||
|
"\n",
|
||||||
|
"涉及到的jupyter可以在[代码库:篇章3-编写一个Transformer模型:BERT,下载](https://github.com/datawhalechina/learn-nlp-with-transformers/tree/main/docs/%E7%AF%87%E7%AB%A03-%E7%BC%96%E5%86%99%E4%B8%80%E4%B8%AATransformer%E6%A8%A1%E5%9E%8B%EF%BC%9ABERT)\n",
|
||||||
|
"\n",
|
||||||
"本篇章将基于H[HuggingFace/Transformers, 48.9k Star](https://github.com/huggingface/transformers)进行学习。本章节的全部代码在[huggingface bert,注意由于版本更新较快,可能存在差别,请以4.4.2版本为准](https://github.com/huggingface/transformers/tree/master/src/transformers/models/bert)HuggingFace 是一家总部位于纽约的聊天机器人初创服务商,很早就捕捉到 BERT 大潮流的信号并着手实现基于 pytorch 的 BERT 模型。这一项目最初名为 pytorch-pretrained-bert,在复现了原始效果的同时,提供了易用的方法以方便在这一强大模型的基础上进行各种玩耍和研究。\n",
|
"本篇章将基于H[HuggingFace/Transformers, 48.9k Star](https://github.com/huggingface/transformers)进行学习。本章节的全部代码在[huggingface bert,注意由于版本更新较快,可能存在差别,请以4.4.2版本为准](https://github.com/huggingface/transformers/tree/master/src/transformers/models/bert)HuggingFace 是一家总部位于纽约的聊天机器人初创服务商,很早就捕捉到 BERT 大潮流的信号并着手实现基于 pytorch 的 BERT 模型。这一项目最初名为 pytorch-pretrained-bert,在复现了原始效果的同时,提供了易用的方法以方便在这一强大模型的基础上进行各种玩耍和研究。\n",
|
||||||
"\n",
|
"\n",
|
||||||
"随着使用人数的增加,这一项目也发展成为一个较大的开源社区,合并了各种预训练语言模型以及增加了 Tensorflow 的实现,并且在 2019 年下半年改名为 Transformers。截止写文章时(2021 年 3 月 30 日)这一项目已经拥有 43k+ 的star,可以说 Transformers 已经成为事实上的 NLP 基本工具。\n",
|
"随着使用人数的增加,这一项目也发展成为一个较大的开源社区,合并了各种预训练语言模型以及增加了 Tensorflow 的实现,并且在 2019 年下半年改名为 Transformers。截止写文章时(2021 年 3 月 30 日)这一项目已经拥有 43k+ 的star,可以说 Transformers 已经成为事实上的 NLP 基本工具。\n",
|
||||||
|
@ -1596,7 +1600,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 21,
|
"execution_count": 30,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1611,7 +1615,7 @@
|
||||||
" hidden_states = self.dense(hidden_states)\n",
|
" hidden_states = self.dense(hidden_states)\n",
|
||||||
" hidden_states = self.dropout(hidden_states)\n",
|
" hidden_states = self.dropout(hidden_states)\n",
|
||||||
" hidden_states = self.LayerNorm(hidden_states + input_tensor)\n",
|
" hidden_states = self.LayerNorm(hidden_states + input_tensor)\n",
|
||||||
" return hidden_states\n"
|
" return hidden_states"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1641,9 +1645,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": 28,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"input to bert pooler size: 768\n",
|
||||||
|
"torch.Size([1, 768])\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"class BertPooler(nn.Module):\n",
|
"class BertPooler(nn.Module):\n",
|
||||||
" def __init__(self, config):\n",
|
" def __init__(self, config):\n",
|
||||||
|
@ -1657,9 +1670,27 @@
|
||||||
" first_token_tensor = hidden_states[:, 0]\n",
|
" first_token_tensor = hidden_states[:, 0]\n",
|
||||||
" pooled_output = self.dense(first_token_tensor)\n",
|
" pooled_output = self.dense(first_token_tensor)\n",
|
||||||
" pooled_output = self.activation(pooled_output)\n",
|
" pooled_output = self.activation(pooled_output)\n",
|
||||||
" return pooled_output"
|
" return pooled_output\n",
|
||||||
|
"from transformers.models.bert.configuration_bert import *\n",
|
||||||
|
"import torch\n",
|
||||||
|
"config = BertConfig.from_pretrained(\"bert-base-uncased\")\n",
|
||||||
|
"bert_pooler = BertPooler(config=config)\n",
|
||||||
|
"print(\"input to bert pooler size: {}\".format(config.hidden_size))\n",
|
||||||
|
"batch_size = 1\n",
|
||||||
|
"seq_len = 2\n",
|
||||||
|
"hidden_size = 768\n",
|
||||||
|
"x = torch.rand(batch_size, seq_len, hidden_size)\n",
|
||||||
|
"y = bert_pooler(x)\n",
|
||||||
|
"print(y.size())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue