

👁关注👉🏻 程旭源,我们一起向上生长

前言简介
 Pointer Generator Network结构[2]
Pointer Generator Network结构[2]
数据集
模型选型
数据预处理 模型训练和评测 因为Transformers包已经帮我们封装好了模型、损失函数等内容,我们只需调用并定义好训练循环即可: 模型保存 优化器我们选使用AdamW,并且通过    在每一个epoch中,调用上面定义的train_loop和test_loop,模型在验证集上的rouge分数用来调整超参数和选出最好的模型,最后使用最好的模型跑测一下测试集来评估最终的性能。 总结 2022-12-31   2022-12-07   2022-12-05   Text Summarization with Pretrained Encoders:https://arxiv.org/abs/1908.08345   [2] Get To The Point: Summarization with Pointer-Generator Networks:http://arxiv.org/abs/1704.04368   [3] BART: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension:https://aclanthology.org/2020.acl-main.703.pdf   [4] BRIO: Bringing Order to Abstractive Summarization:https://arxiv.org/abs/2203.16804v1   [5] GSum: A General Framework for Guided Neural Abstractive Summarization:https://arxiv.org/abs/2010.08014   [6] SimCLS: A Simple Framework for Contrastive Learning of Abstractive Summarization:https://arxiv.org/abs/2106.01890v1   [7] Abstractive Summarization with Combination of Pre-trained Sequence-to-Sequence and Saliency Models:https://arxiv.org/abs/2003.13028   [8] XL-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages:https://github.com/csebuetnlp/xl-sum   [9] mT5: A massively multilingual pre-trained text-to-text transformer:https://github.com/google-research/multilingual-t5   [10] Lcsts: A large scale chinese short text summarization dataset:https://arxiv.org/pdf/1506.05865.pdf   [11] rouge:https://github.com/pltrdy/rouge   [12] summarization:https://xiaosheng.run/   [13] Deep reinforcement and transfer learning for abstractive text summarization:https://www.sciencedirect.com/science/article/abs/pii/S0885230821000796   [14] Summarization Papers:https://github.com/xcfcode/Summarization-Papers   “点赞”是喜欢,“在看、分享”是真爱from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model_checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

def train_loop(dataloader, model, optimizer, lr_scheduler, epoch, total_loss):
    progress_bar = tqdm(range(len(dataloader)))
    progress_bar.set_description(f'loss: {0:>7f}')
    finish_batch_num = (epoch-1) * len(dataloader)
    
    model.train()
    for batch, batch_data in enumerate(dataloader, start=1):
        batch_data = batch_data.to(device)
        outputs = model(**batch_data)
        loss = outputs.loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        total_loss += loss.item()
        
        progress_bar.set_description(f'loss: {total_loss/(finish_batch_num + batch):>7f}')
        progress_bar.update(1)
    
    return total_losspip install rouge


def test_loop(dataloader, model, tokenizer):
    preds, labels = [], []
    rouge = Rouge()
    
    model.eval()
    with torch.no_grad():
        for batch_data in tqdm(dataloader):
            batch_data = batch_data.to(device)
            # 获取预测结果
            generated_tokens = model.generate(batch_data["input_ids"],
                                              attention_mask=batch_data["attention_mask"],
                                              max_length=max_target_length,
                                              num_beams=beam_search_size,
                                              no_repeat_ngram_size=no_repeat_ngram_size,
                                              ).cpu().numpy()
            if isinstance(generated_tokens, tuple):
                generated_tokens = generated_tokens[0]
            decoded_preds = tokenizer.batch_decode(generated_tokens, 
                                                   skip_special_tokens=True, 
                                                   clean_up_tokenization_spaces=False)
            
            label_tokens = batch_data["labels"].cpu().numpy()
            # 将标签序列中的 -100 替换为 pad token ID 以便于分词器解码
            label_tokens = np.where(label_tokens != -100, label_tokens, tokenizer.pad_token_id)
            decoded_labels = tokenizer.batch_decode(label_tokens, 
                                                    skip_special_tokens=True, 
                                                    clean_up_tokenization_spaces=False)
            # 处理为 rouge 库接受的文本列表格式
            preds += [' '.join(pred.strip()) for pred in decoded_preds]
            labels += [' '.join(label.strip()) for label in decoded_labels]
    # rouge 库计算各项 ROUGE 值
    scores = rouge.get_scores(hyps=preds, refs=labels, avg=True)
    result = {key: value['f'] * 100 for key, value in scores.items()}
    result['avg'] = np.mean(list(result.values()))
    return resultget_scheduler()函数定义学习率调度器。""" Train the model """
total_steps = len(train_dataloader) * num_train_epochs
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
    {"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], "weight_decay": weight_decay},
    {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0}
]
warmup_steps = int(total_steps * warmup_proportion)
optimizer = AdamW(
    optimizer_grouped_parameters, 
    lr=learning_rate, 
    betas=(adam_beta1, adam_beta2), 
    eps=adam_epsilon
)
lr_scheduler = get_scheduler(
    'linear',
    optimizer, 
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)
# Train!
logger.info("***** Running training *****")
logger.info(f"Num examples - {len(train_data)}")
logger.info(f"Num Epochs - {num_train_epochs}")
logger.info(f"Total optimization steps - {total_steps}")
total_loss = 0.
best_avg_rouge = 0.
for epoch in range(num_train_epochs):
    print(f"Epoch {epoch+1}/{num_train_epochs}n" + 30 * "-")
    total_loss = train_loop(train_dataloader, model, optimizer, lr_scheduler, epoch, total_loss)
    dev_rouges = test_loop(dev_dataloader, model, tokenizer)
    logger.info(f"Dev Rouge1: {dev_rouges['rouge-1']:>0.2f} Rouge2: {dev_rouges['rouge-2']:>0.2f} RougeL: {dev_rouges['rouge-l']:>0.2f}")
    rouge_avg = dev_rouges['avg']
    if rouge_avg > best_avg_rouge:
        best_avg_rouge = rouge_avg
        logger.info(f'saving new weights to {output_dir}...n')
        save_weight = f'epoch_{epoch+1}_rouge_{rouge_avg:0.4f}_weights.bin'
        torch.save(model.state_dict(), os.path.join(output_dir, save_weight))
logger.info("Done!")




Reference


 【知乎、B站、CSDN同名】
【知乎、B站、CSDN同名】
<
 
                
Comments(0)
大佬!求源码