当前位置:   article > 正文

FATE —— 二.4.3 使用冻结参数Bert进行情绪分类_fate_llm.model_zoo.bert_

fate_llm.model_zoo.bert_

在本例中,我们将使用Frozen Parameters Bert构造一个文本分类器,并在IMDB情感分类数据集上进行训练

数据集:IMDB情感

这是一个二进制分类数据集,您可以从这里下载我们的处理数据集:

并将其放在examples/data文件夹中(或者自己存放文件的地址,代码中需要用到)。

组织数据来自:

检查数据集

  1. import pandas as pd
  2. df = pd.read_csv('/mnt/hgfs/examples/data/IMDB.csv') # 根据自己的文件位置进行调整
  3. df

from federatedml.nn.dataset.nlp_tokenizer import TokenizerDataset
  1. ds = TokenizerDataset(tokenizer_name_or_path="bert-base-uncased")
  2. ds.load('/mnt/hgfs/examples/data/IMDB.csv') # 根据自己的文件位置进行调整
  1. from torch.utils.data import DataLoader
  2. dl = DataLoader(ds, batch_size=16)
  3. for i in dl:
  4. break

构建Bert分类器

from pipeline.component.nn import save_to_fate
  1. %%save_to_fate model bert_.py
  2. import torch as t
  3. from federatedml.nn.model_zoo.pretrained_bert import PretrainedBert
  4. class BertClassifier(t.nn.Module):
  5. def __init__(self, ):
  6. super(BertClassifier, self).__init__()
  7. self.bert = PretrainedBert(pretrained_model_name_or_path='bert-base-uncased', freeze_weight=True)
  8. self.classifier = t.nn.Sequential(
  9. t.nn.Linear(768, 128),
  10. t.nn.ReLU(),
  11. t.nn.Linear(128, 64),
  12. t.nn.ReLU(),
  13. t.nn.Linear(64, 1),
  14. t.nn.Sigmoid()
  15. )
  16. def parameters(self, ):
  17. return self.classifier.parameters()
  18. def forward(self, x):
  19. x = self.bert(x)
  20. return self.classifier(x.pooler_output)
model = BertClassifier()
  1. import torch as t
  2. from federatedml.nn.homo.trainer.fedavg_trainer import FedAVGTrainer
  3. trainer = FedAVGTrainer(epochs=3, batch_size=16, shuffle=True, data_loader_worker=4)
  4. trainer.local_mode()
  5. trainer.set_model(model)
本地测试
  1. opt = t.optim.Adam(model.parameters(), lr=0.005)
  2. loss = t.nn.BCELoss()
  3. # local test
  4. trainer.train(ds, None, opt, loss)

提交pipeline

  1. import torch as t
  2. from torch import nn
  3. from pipeline import fate_torch_hook
  4. from pipeline.component import HomoNN
  5. from pipeline.backend.pipeline import PipeLine
  6. from pipeline.component import Reader, Evaluation, DataTransform
  7. from pipeline.interface import Data, Model
  8. fate_torch_hook(t)
  9. import os
  10. # fate_project_path = os.path.abspath('../../../../')
  11. guest_0 = 10000
  12. host_1 = 9999
  13. pipeline = PipeLine().set_initiator(role='guest', party_id=guest_0).set_roles(guest=guest_0, host=host_1,
  14. arbiter=guest_0)
  15. data_0 = {"name": "imdb", "namespace": "experiment"}
  16. data_path = '/mnt/hgfs/examples/data/IMDB.csv' # 根据自己的文件位置进行调整
  17. pipeline.bind_table(name=data_0['name'], namespace=data_0['namespace'], path=data_path)
  18. pipeline.bind_table(name=data_0['name'], namespace=data_0['namespace'], path=data_path)

{'namespace': 'experiment', 'table_name': 'imdb'}

  1. reader_0 = Reader(name="reader_0")
  2. reader_0.get_party_instance(role='guest', party_id=guest_0).component_param(table=data_0)
  3. reader_0.get_party_instance(role='host', party_id=host_1).component_param(table=data_0)
  4. reader_1 = Reader(name="reader_1")
  5. reader_1.get_party_instance(role='guest', party_id=guest_0).component_param(table=data_0)
  6. reader_1.get_party_instance(role='host', party_id=host_1).component_param(table=data_0)
如果虚拟机没有GPU,这里建议将cuda=True给删除。否则会报错
  1. from pipeline.component.homo_nn import DatasetParam, TrainerParam
  2. model = t.nn.Sequential(
  3. t.nn.CustModel(module_name='bert_', class_name='BertClassifier')
  4. )
  5. # nn_component = HomoNN(name='nn_0',
  6. # model=model,
  7. # loss=t.nn.BCELoss(),
  8. # optimizer = t.optim.Adam(lr=0.001, weight_decay=0.001),
  9. # dataset=DatasetParam(dataset_name='nlp_tokenizer', tokenizer_name_or_path="bert-base-uncased"), # 使用自定义的dataset
  10. # trainer=TrainerParam(trainer_name='fedavg_trainer', epochs=2, batch_size=16, data_loader_worker=8, cuda=True),
  11. # torch_seed=100
  12. # )
  13. nn_component = HomoNN(name='nn_0',
  14. model=model,
  15. loss=t.nn.BCELoss(),
  16. optimizer = t.optim.Adam(lr=0.001, weight_decay=0.001),
  17. dataset=DatasetParam(dataset_name='nlp_tokenizer', tokenizer_name_or_path="bert-base-uncased"), # 使用自定义的dataset
  18. trainer=TrainerParam(trainer_name='fedavg_trainer', epochs=2, batch_size=16, data_loader_worker=8),
  19. torch_seed=100
  20. )
这里把pipeline.add_component(reader_0)添加上,否则会报错
  1. pipeline.add_component(reader_0)
  2. pipeline.add_component(reader_1)
  3. pipeline.add_component(nn_component, data=Data(train_data=reader_0.output.data, validate_data=reader_1.output.data))
  4. pipeline.add_component(Evaluation(name='eval_0', eval_type='binary'), data=Data(data=nn_component.output.data))
  1. pipeline.compile()
  2. pipeline.fit()

写入并保存

  1. df = pipeline.get_component('nn_0').get_output_data() # get result
  2. df

  1. import pandas as pd
  2. df.to_csv('使用冻结参数Bert进行情绪分类.csv')
pipeline.get_component('nn_0').get_summary()

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小小林熬夜学编程/article/detail/356546
推荐阅读
相关标签
  

闽ICP备14008679号