diff --git a/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/evaluate.py b/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/evaluate.py
new file mode 100644
index 0000000..41effa7
--- /dev/null
+++ b/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/evaluate.py
@@ -0,0 +1,92 @@
+""" Official evaluation script for v1.1 of the SQuAD dataset. """
+
+import argparse
+import json
+import re
+import string
+import sys
+from collections import Counter
+
+
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+
+    def white_space_fix(text):
+        return " ".join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1_score(prediction, ground_truth):
+    prediction_tokens = normalize_answer(prediction).split()
+    ground_truth_tokens = normalize_answer(ground_truth).split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return 0
+    precision = 1.0 * num_same / len(prediction_tokens)
+    recall = 1.0 * num_same / len(ground_truth_tokens)
+    f1 = (2 * precision * recall) / (precision + recall)
+    return f1
+
+
+def exact_match_score(prediction, ground_truth):
+    return normalize_answer(prediction) == normalize_answer(ground_truth)
+
+
+def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
+    scores_for_ground_truths = []
+    for ground_truth in ground_truths:
+        score = metric_fn(prediction, ground_truth)
+        scores_for_ground_truths.append(score)
+    return max(scores_for_ground_truths)
+
+
+def evaluate(dataset, predictions):
+    f1 = exact_match = total = 0
+    for article in dataset:
+        for paragraph in article["paragraphs"]:
+            for qa in paragraph["qas"]:
+                total += 1
+                if qa["id"] not in predictions:
+                    message = "Unanswered question " + qa["id"] + " will receive score 0."
+                    print(message, file=sys.stderr)
+                    continue
+                ground_truths = list(map(lambda x: x["text"], qa["answers"]))
+                prediction = predictions[qa["id"]]
+                exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
+                f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
+
+    exact_match = 100.0 * exact_match / total
+    f1 = 100.0 * f1 / total
+
+    return {"exact_match": exact_match, "f1": f1}
+
+
+if __name__ == "__main__":
+    expected_version = "1.1"
+    parser = argparse.ArgumentParser(description="Evaluation for SQuAD " + expected_version)
+    parser.add_argument("dataset_file", help="Dataset file")
+    parser.add_argument("prediction_file", help="Prediction File")
+    args = parser.parse_args()
+    with open(args.dataset_file) as dataset_file:
+        dataset_json = json.load(dataset_file)
+        if dataset_json["version"] != expected_version:
+            print(
+                "Evaluation expects v-" + expected_version + ", but got dataset with v-" + dataset_json["version"],
+                file=sys.stderr,
+            )
+        dataset = dataset_json["data"]
+    with open(args.prediction_file) as prediction_file:
+        predictions = json.load(prediction_file)
+    print(json.dumps(evaluate(dataset, predictions)))
diff --git a/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/rouge.py b/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/rouge.py
new file mode 100644
index 0000000..a2219c1
--- /dev/null
+++ b/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/rouge.py
@@ -0,0 +1,131 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" ROUGE metric from Google Research github repo. """
+
+# The dependencies in https://github.com/google-research/google-research/blob/master/rouge/requirements.txt
+import absl  # Here to have a nice missing dependency error message early on
+import nltk  # Here to have a nice missing dependency error message early on
+import numpy  # Here to have a nice missing dependency error message early on
+import six  # Here to have a nice missing dependency error message early on
+from rouge_score import rouge_scorer, scoring
+
+import datasets
+
+
+_CITATION = """\
+@inproceedings{lin-2004-rouge,
+    title = "{ROUGE}: A Package for Automatic Evaluation of Summaries",
+    author = "Lin, Chin-Yew",
+    booktitle = "Text Summarization Branches Out",
+    month = jul,
+    year = "2004",
+    address = "Barcelona, Spain",
+    publisher = "Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/W04-1013",
+    pages = "74--81",
+}
+"""
+
+_DESCRIPTION = """\
+ROUGE, or Recall-Oriented Understudy for Gisting Evaluation, is a set of metrics and a software package used for
+evaluating automatic summarization and machine translation software in natural language processing.
+The metrics compare an automatically produced summary or translation against a reference or a set of references (human-produced) summary or translation.
+
+Note that ROUGE is case insensitive, meaning that upper case letters are treated the same way as lower case letters.
+
+This metrics is a wrapper around Google Research reimplementation of ROUGE:
+https://github.com/google-research/google-research/tree/master/rouge
+"""
+
+_KWARGS_DESCRIPTION = """
+Calculates average rouge scores for a list of hypotheses and references
+Args:
+    predictions: list of predictions to score. Each predictions
+        should be a string with tokens separated by spaces.
+    references: list of reference for each prediction. Each
+        reference should be a string with tokens separated by spaces.
+    rouge_types: A list of rouge types to calculate.
+        Valid names:
+        `"rouge{n}"` (e.g. `"rouge1"`, `"rouge2"`) where: {n} is the n-gram based scoring,
+        `"rougeL"`: Longest common subsequence based scoring.
+        `"rougeLSum"`: rougeLsum splits text using `"\n"`.
+        See details in https://github.com/huggingface/datasets/issues/617
+    use_stemmer: Bool indicating whether Porter stemmer should be used to strip word suffixes.
+    use_agregator: Return aggregates if this is set to True
+Returns:
+    rouge1: rouge_1 (precision, recall, f1),
+    rouge2: rouge_2 (precision, recall, f1),
+    rougeL: rouge_l (precision, recall, f1),
+    rougeLsum: rouge_lsum (precision, recall, f1)
+Examples:
+
+    >>> rouge = datasets.load_metric('rouge')
+    >>> predictions = ["hello there", "general kenobi"]
+    >>> references = ["hello there", "general kenobi"]
+    >>> results = rouge.compute(predictions=predictions, references=references)
+    >>> print(list(results.keys()))
+    ['rouge1', 'rouge2', 'rougeL', 'rougeLsum']
+    >>> print(results["rouge1"])
+    AggregateScore(low=Score(precision=1.0, recall=1.0, fmeasure=1.0), mid=Score(precision=1.0, recall=1.0, fmeasure=1.0), high=Score(precision=1.0, recall=1.0, fmeasure=1.0))
+    >>> print(results["rouge1"].mid.fmeasure)
+    1.0
+"""
+
+
+@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class Rouge(datasets.Metric):
+    def _info(self):
+        return datasets.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Value("string", id="sequence"),
+                    "references": datasets.Value("string", id="sequence"),
+                }
+            ),
+            codebase_urls=["https://github.com/google-research/google-research/tree/master/rouge"],
+            reference_urls=[
+                "https://en.wikipedia.org/wiki/ROUGE_(metric)",
+                "https://github.com/google-research/google-research/tree/master/rouge",
+            ],
+        )
+
+    def _compute(self, predictions, references, rouge_types=None, use_agregator=True, use_stemmer=False):
+        if rouge_types is None:
+            rouge_types = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
+
+        scorer = rouge_scorer.RougeScorer(rouge_types=rouge_types, use_stemmer=use_stemmer)
+        if use_agregator:
+            aggregator = scoring.BootstrapAggregator()
+        else:
+            scores = []
+
+        for ref, pred in zip(references, predictions):
+            score = scorer.score(ref, pred)
+            if use_agregator:
+                aggregator.add_scores(score)
+            else:
+                scores.append(score)
+
+        if use_agregator:
+            result = aggregator.aggregate()
+        else:
+            result = {}
+            for key in scores[0]:
+                result[key] = list(score[key] for score in scores)
+
+        return result
diff --git a/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/squad.py b/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/squad.py
new file mode 100644
index 0000000..d582195
--- /dev/null
+++ b/docs/篇章4-使用Transformers解决NLP任务/datasets/metrics/squad.py
@@ -0,0 +1,110 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" SQuAD metric. """
+
+import datasets
+
+from .evaluate import evaluate
+
+
+_CITATION = """\
+@inproceedings{Rajpurkar2016SQuAD10,
+  title={SQuAD: 100, 000+ Questions for Machine Comprehension of Text},
+  author={Pranav Rajpurkar and Jian Zhang and Konstantin Lopyrev and Percy Liang},
+  booktitle={EMNLP},
+  year={2016}
+}
+"""
+
+_DESCRIPTION = """
+This metric wrap the official scoring script for version 1 of the Stanford Question Answering Dataset (SQuAD).
+
+Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by
+crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span,
+from the corresponding reading passage, or the question might be unanswerable.
+"""
+
+_KWARGS_DESCRIPTION = """
+Computes SQuAD scores (F1 and EM).
+Args:
+    predictions: List of question-answers dictionaries with the following key-values:
+        - 'id': id of the question-answer pair as given in the references (see below)
+        - 'prediction_text': the text of the answer
+    references: List of question-answers dictionaries with the following key-values:
+        - 'id': id of the question-answer pair (see above),
+        - 'answers': a Dict in the SQuAD dataset format
+            {
+                'text': list of possible texts for the answer, as a list of strings
+                'answer_start': list of start positions for the answer, as a list of ints
+            }
+            Note that answer_start values are not taken into account to compute the metric.
+Returns:
+    'exact_match': Exact match (the normalized answer exactly match the gold answer)
+    'f1': The F-score of predicted tokens versus the gold answer
+Examples:
+
+    >>> predictions = [{'prediction_text': '1976', 'id': '56e10a3be3433e1400422b22'}]
+    >>> references = [{'answers': {'answer_start': [97], 'text': ['1976']}, 'id': '56e10a3be3433e1400422b22'}]
+    >>> squad_metric = datasets.load_metric("squad")
+    >>> results = squad_metric.compute(predictions=predictions, references=references)
+    >>> print(results)
+    {'exact_match': 100.0, 'f1': 100.0}
+"""
+
+
+@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class Squad(datasets.Metric):
+    def _info(self):
+        return datasets.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "predictions": {"id": datasets.Value("string"), "prediction_text": datasets.Value("string")},
+                    "references": {
+                        "id": datasets.Value("string"),
+                        "answers": datasets.features.Sequence(
+                            {
+                                "text": datasets.Value("string"),
+                                "answer_start": datasets.Value("int32"),
+                            }
+                        ),
+                    },
+                }
+            ),
+            codebase_urls=["https://rajpurkar.github.io/SQuAD-explorer/"],
+            reference_urls=["https://rajpurkar.github.io/SQuAD-explorer/"],
+        )
+
+    def _compute(self, predictions, references):
+        pred_dict = {prediction["id"]: prediction["prediction_text"] for prediction in predictions}
+        dataset = [
+            {
+                "paragraphs": [
+                    {
+                        "qas": [
+                            {
+                                "answers": [{"text": answer_text} for answer_text in ref["answers"]["text"]],
+                                "id": ref["id"],
+                            }
+                            for ref in references
+                        ]
+                    }
+                ]
+            }
+        ]
+        score = evaluate(dataset=dataset, predictions=pred_dict)
+        return score
diff --git a/docs/篇章4-使用Transformers解决NLP任务/datasets/squad/squad.py b/docs/篇章4-使用Transformers解决NLP任务/datasets/squad/squad.py
new file mode 100644
index 0000000..846001d
--- /dev/null
+++ b/docs/篇章4-使用Transformers解决NLP任务/datasets/squad/squad.py
@@ -0,0 +1,139 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors and the HuggingFace Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""SQUAD: The Stanford Question Answering Dataset."""
+
+from __future__ import absolute_import, division, print_function
+
+import json
+
+import datasets
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{2016arXiv160605250R,
+       author = {{Rajpurkar}, Pranav and {Zhang}, Jian and {Lopyrev},
+                 Konstantin and {Liang}, Percy},
+        title = "{SQuAD: 100,000+ Questions for Machine Comprehension of Text}",
+      journal = {arXiv e-prints},
+         year = 2016,
+          eid = {arXiv:1606.05250},
+        pages = {arXiv:1606.05250},
+archivePrefix = {arXiv},
+       eprint = {1606.05250},
+}
+"""
+
+_DESCRIPTION = """\
+Stanford Question Answering Dataset (SQuAD) is a reading comprehension \
+dataset, consisting of questions posed by crowdworkers on a set of Wikipedia \
+articles, where the answer to every question is a segment of text, or span, \
+from the corresponding reading passage, or the question might be unanswerable.
+"""
+
+_URL = "https://rajpurkar.github.io/SQuAD-explorer/dataset/"
+_URLS = {
+    "train": _URL + "train-v1.1.json",
+    "dev": _URL + "dev-v1.1.json",
+}
+
+
+class SquadConfig(datasets.BuilderConfig):
+    """BuilderConfig for SQUAD."""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for SQUAD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(SquadConfig, self).__init__(**kwargs)
+
+
+class Squad(datasets.GeneratorBasedBuilder):
+    """SQUAD: The Stanford Question Answering Dataset. Version 1.1."""
+
+    BUILDER_CONFIGS = [
+        SquadConfig(
+            name="plain_text",
+            version=datasets.Version("1.0.0", ""),
+            description="Plain text",
+        ),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "title": datasets.Value("string"),
+                    "context": datasets.Value("string"),
+                    "question": datasets.Value("string"),
+                    "answers": datasets.features.Sequence(
+                        {
+                            "text": datasets.Value("string"),
+                            "answer_start": datasets.Value("int32"),
+                        }
+                    ),
+                }
+            ),
+            # No default supervised_keys (as we have to pass both question
+            # and context as input).
+            supervised_keys=None,
+            homepage="https://rajpurkar.github.io/SQuAD-explorer/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        downloaded_files = self.config.data_files
+
+        return [
+            datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": downloaded_files["train"]}),
+            datasets.SplitGenerator(name=datasets.Split.VALIDATION, gen_kwargs={"filepath": downloaded_files["validation"]}),
+        ]
+
+    def _generate_examples(self, filepath):
+        """This function returns the examples in the raw (text) form."""
+        logger.info("generating examples from = %s", filepath)
+        with open(filepath, encoding="utf-8") as f:
+            squad = json.load(f)
+            for article in squad["data"]:
+                title = article.get("title", "").strip()
+                for paragraph in article["paragraphs"]:
+                    context = paragraph["context"].strip()
+                    for qa in paragraph["qas"]:
+                        question = qa["question"].strip()
+                        id_ = qa["id"]
+
+                        answer_starts = [answer["answer_start"] for answer in qa["answers"]]
+                        answers = [answer["text"].strip() for answer in qa["answers"]]
+
+                        # Features currently used are "context", "question", and "answers".
+                        # Others are extracted here for the ease of future expansions.
+                        yield id_, {
+                            "title": title,
+                            "context": context,
+                            "question": question,
+                            "id": id_,
+                            "answers": {
+                                "answer_start": answer_starts,
+                                "text": answers,
+                            },
+                        }
diff --git a/docs/篇章4-使用Transformers解决NLP任务/datasets/xsum/xsum.py b/docs/篇章4-使用Transformers解决NLP任务/datasets/xsum/xsum.py
new file mode 100644
index 0000000..35b85f9
--- /dev/null
+++ b/docs/篇章4-使用Transformers解决NLP任务/datasets/xsum/xsum.py
@@ -0,0 +1,154 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors and the HuggingFace Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""XSum dataset."""
+
+
+import json
+import os
+
+import datasets
+
+
+_CITATION = """
+@article{Narayan2018DontGM,
+  title={Don't Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization},
+  author={Shashi Narayan and Shay B. Cohen and Mirella Lapata},
+  journal={ArXiv},
+  year={2018},
+  volume={abs/1808.08745}
+}
+"""
+
+_DESCRIPTION = """
+Extreme Summarization (XSum) Dataset.
+
+There are three features:
+  - document: Input news article.
+  - summary: One sentence summary of the article.
+  - id: BBC ID of the article.
+
+"""
+
+# From https://github.com/EdinburghNLP/XSum/issues/12
+_URL_DATA = "http://bollin.inf.ed.ac.uk/public/direct/XSUM-EMNLP18-Summary-Data-Original.tar.gz"
+_URL_SPLITS = (
+    "https://raw.githubusercontent.com/EdinburghNLP/XSum/master/XSum-Dataset/XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json"
+)
+
+_DOCUMENT = "document"
+_SUMMARY = "summary"
+_ID = "id"
+
+_REMOVE_LINES = set(
+    [
+        "Share this with\n",
+        "Email\n",
+        "Facebook\n",
+        "Messenger\n",
+        "Twitter\n",
+        "Pinterest\n",
+        "WhatsApp\n",
+        "Linkedin\n",
+        "LinkedIn\n",
+        "Copy this link\n",
+        "These are external links and will open in a new window\n",
+    ]
+)
+
+
+class Xsum(datasets.GeneratorBasedBuilder):
+    """Extreme Summarization (XSum) Dataset."""
+
+    # Version 1.2.0 expands coverage, includes ids, and removes web contents.
+    VERSION = datasets.Version("1.2.0")
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    _DOCUMENT: datasets.Value("string"),
+                    _SUMMARY: datasets.Value("string"),
+                    _ID: datasets.Value("string"),
+                }
+            ),
+            supervised_keys=(_DOCUMENT, _SUMMARY),
+            homepage="https://github.com/EdinburghNLP/XSum/tree/master/XSum-Dataset",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        data_files = self.config.data_files
+        split_path = os.path.join(data_files["data"], "XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json")
+
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "split_path": split_path,
+                    "split_name": "train",
+                    "data_dir": os.path.join(data_files["data"], "bbc-summary-data"),
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.VALIDATION,
+                gen_kwargs={
+                    "split_path": split_path,
+                    "split_name": "validation",
+                    "data_dir": os.path.join(data_files["data"], "bbc-summary-data"),
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                gen_kwargs={
+                    "split_path": split_path,
+                    "split_name": "test",
+                    "data_dir": os.path.join(data_files["data"], "bbc-summary-data"),
+                },
+            ),
+        ]
+
+    def _generate_examples(self, split_path, split_name, data_dir):
+        """Yields examples."""
+
+        with open(split_path, "r", encoding="utf-8") as f:
+            split_ids = json.load(f)
+
+        for i in split_ids[split_name]:
+            with open(os.path.join(data_dir, i + ".summary"), "r", encoding="utf-8") as f:
+                text = "".join([line for line in f.readlines() if line not in _REMOVE_LINES and line.strip()])
+                # Each file follows below format:
+                # [SN]URL[SN]
+                # http://somelink
+                #
+                # [SN]TITLE[SN]
+                # some intro
+                #
+                # [SN]FIRST-SENTENCE[SN]
+                # some intro
+                #
+                # [SN]RESTBODY[SN]
+                # text line.
+                # another text line.
+                # "another text line."
+
+                # According to the following issue, FIRST-SENTENCE
+                # is the reference summary and TITLE is unused:
+                # https://github.com/EdinburghNLP/XSum/issues/22
+                segs = text.split("[SN]")
+                yield i, {_DOCUMENT: segs[8].strip(), _SUMMARY: segs[6].strip(), _ID: i}