当前位置:   article > 正文

nlp:roberta_get_linear_schedule_with_warmup无法调用

get_linear_schedule_with_warmup无法调用

run_boolq_roberta.py

CUDA_VISIBLE_DEVICES=2 python use_boolq_bert.py --model_type bert --model_name_or_path bert-base-cased --do_eval --do_lower_case --train_file train.jsonl --predict_file val.jsonl --test_file test3.jsonl --per_gpu_eval_batch_size=8 --output_dir /boolq_bert_output/checkpoint-75000
  1. import json
  2. import argparse
  3. import csv
  4. import glob
  5. import logging
  6. import os
  7. import random
  8. import numpy as np
  9. import torch
  10. from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
  11. from torch.utils.data.distributed import DistributedSampler
  12. from tqdm import tqdm, trange
  13. from transformers import (
  14. WEIGHTS_NAME,
  15. AdamW,
  16. RobertaConfig,
  17. RobertaForSequenceClassification,
  18. RobertaTokenizer,
  19. get_linear_schedule_with_warmup,
  20. )
  21. try:
  22. from torch.utils.tensorboard import SummaryWriter
  23. except ImportError:
  24. from tensorboardX import SummaryWriter
  25. logger = logging.getLogger(__name__)
  26. MODEL_CLASSES = {
  27. "roberta": (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer),
  28. }
  29. class SwagExample(object):
  30. """A single training/test example for the SWAG dataset."""
  31. def __init__(self, swag_id, context_sentence, start_ending, ending_0, ending_1, ending_2, ending_3, label=None):
  32. self.swag_id = swag_id
  33. self.context_sentence = context_sentence
  34. self.start_ending = start_ending
  35. self.endings = [
  36. ending_0,
  37. ending_1,
  38. ending_2,
  39. ending_3,
  40. ]
  41. self.label = label
  42. def __str__(self):
  43. return self.__repr__()
  44. def __repr__(self):
  45. attributes = [
  46. "swag_id: {}".format(self.swag_id),
  47. "context_sentence: {}".format(self.context_sentence),
  48. "start_ending: {}".format(self.start_ending),
  49. "ending_0: {}".format(self.endings[0]),
  50. "ending_1: {}".format(self.endings[1]),
  51. "ending_2: {}".format(self.endings[2]),
  52. "ending_3: {}".format(self.endings[3]),
  53. ]
  54. if self.label is not None:
  55. attributes.append("label: {}".format(self.label))
  56. return ", ".join(attributes)
  57. class BoolqExample(object):
  58. """A single training/test example for the Boolq dataset."""
  59. def __init__(self, swag_id, context_sentence, start_ending, ending_0, label=None):
  60. self.swag_id = swag_id
  61. self.context_sentence = context_sentence
  62. self.start_ending = start_ending
  63. self.endings = [
  64. ending_0
  65. # ending_1,
  66. ]
  67. self.label = label
  68. def __str__(self):
  69. return self.__repr__()
  70. def __repr__(self):
  71. attributes = [
  72. "swag_id: {}".format(self.swag_id),
  73. "context_sentence: {}".format(self.context_sentence),
  74. "start_ending: {}".format(self.start_ending),
  75. "ending_0: {}".format(self.endings[0]),
  76. #"ending_1: {}".format(self.endings[1]),
  77. ]
  78. if self.label is not None:
  79. attributes.append("label: {}".format(self.label))
  80. return ", ".join(attributes)
  81. class MultiRCExample(object):
  82. """A single training/test example for the Boolq dataset."""
  83. def __init__(self, swag_id, context_sentence, start_ending, ending_0, label=None):
  84. self.swag_id = swag_id
  85. self.context_sentence = context_sentence
  86. self.start_ending = start_ending
  87. self.endings = [
  88. ending_0
  89. # ending_1,
  90. ]
  91. self.label = label
  92. def __str__(self):
  93. return self.__repr__()
  94. def __repr__(self):
  95. attributes = [
  96. "swag_id: {}".format(self.swag_id),
  97. "context_sentence: {}".format(self.context_sentence),
  98. "start_ending: {}".format(self.start_ending),
  99. "ending_0: {}".format(self.endings[0]),
  100. #"ending_1: {}".format(self.endings[1]),
  101. ]
  102. if self.label is not None:
  103. attributes.append("label: {}".format(self.label))
  104. return ", ".join(attributes)
  105. '''class InputFeatures(object):
  106. def __init__(self, example_id, choices_features, label):
  107. self.example_id = example_id
  108. self.choices_features = [
  109. {"input_ids": input_ids, "input_mask": input_mask, "segment_ids": segment_ids}
  110. for _, input_ids, input_mask, segment_ids in choices_features
  111. ]
  112. self.label = label'''
  113. class InputFeatures(object):
  114. """
  115. A single set of features of data.
  116. Args:
  117. input_ids: Indices of input sequence tokens in the vocabulary.
  118. attention_mask: Mask to avoid performing attention on padding token indices.
  119. Mask values selected in ``[0, 1]``:
  120. Usually ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
  121. token_type_ids: Segment token indices to indicate first and second portions of the inputs.
  122. label: Label corresponding to the input
  123. """
  124. def __init__(self, input_ids, attention_mask=None, token_type_ids=None, label=None):
  125. self.input_ids = input_ids
  126. self.attention_mask = attention_mask
  127. self.token_type_ids = token_type_ids
  128. self.label = label
  129. def read_swag_examples(input_file, is_training=True):
  130. with open(input_file, "r", encoding="utf-8") as f:
  131. lines = list(csv.reader(f))
  132. if is_training and lines[0][-1] != "label":
  133. raise ValueError("For training, the input file must contain a label column.")
  134. examples = [
  135. SwagExample(
  136. swag_id=line[2],
  137. context_sentence=line[4],
  138. start_ending=line[5], # in the swag dataset, the
  139. # common beginning of each
  140. # choice is stored in "sent2".
  141. ending_0=line[7],
  142. ending_1=line[8],
  143. ending_2=line[9],
  144. ending_3=line[10],
  145. label=int(line[11]) if is_training else None,
  146. )
  147. for line in lines[1:] # we skip the line with the column names
  148. ]
  149. return examples
  150. def read_boolq_examples(input_file, is_training=True):
  151. with open(input_file, "r", encoding="utf-8") as f:
  152. lines = f.readlines()
  153. examples = []
  154. for line in lines:
  155. data_raw = json.loads(line.strip("\n"))
  156. if data_raw["label"]:
  157. label_input = 1
  158. else:
  159. label_input = 0
  160. examples.append(
  161. BoolqExample(
  162. swag_id=data_raw["idx"],
  163. context_sentence=data_raw["passage"],
  164. start_ending=data_raw["question"],
  165. ending_0="",
  166. label=label_input if is_training else None,
  167. )
  168. )
  169. return examples
  170. def convert_examples_to_features(examples, tokenizer, max_seq_length, is_training):
  171. """Loads a data file into a list of `InputBatch`s."""
  172. # Swag is a multiple choice task. To perform this task using Bert,
  173. # we will use the formatting proposed in "Improving Language
  174. # Understanding by Generative Pre-Training" and suggested by
  175. # @jacobdevlin-google in this issue
  176. # https://github.com/google-research/bert/issues/38.
  177. #
  178. # Each choice will correspond to a sample on which we run the
  179. # inference. For a given Swag example, we will create the 4
  180. # following inputs:
  181. # - [CLS] context [SEP] choice_1 [SEP]
  182. # - [CLS] context [SEP] choice_2 [SEP]
  183. # - [CLS] context [SEP] choice_3 [SEP]
  184. # - [CLS] context [SEP] choice_4 [SEP]
  185. # The model will output a single value for each input. To get the
  186. # final decision of the model, we will run a softmax over these 4
  187. # outputs.
  188. features = []
  189. for example_index, example in tqdm(enumerate(examples)):
  190. context_tokens = tokenizer.tokenize(example.context_sentence)
  191. start_ending_tokens = tokenizer.tokenize(example.start_ending)
  192. context_tokens_choice = context_tokens[:]
  193. ending_tokens = start_ending_tokens
  194. _truncate_seq_pair(context_tokens_choice, ending_tokens, max_seq_length - 3)
  195. tokens = ["[CLS]"] + context_tokens_choice + ["[SEP]"] + ending_tokens + ["[SEP]"]
  196. segment_ids = [0] * (len(context_tokens_choice) + 2) + [1] * (len(ending_tokens) + 1)
  197. input_ids = tokenizer.convert_tokens_to_ids(tokens)
  198. input_mask = [1] * len(input_ids)
  199. # Zero-pad up to the sequence length.
  200. padding = [0] * (max_seq_length - len(input_ids))
  201. input_ids += padding
  202. input_mask += padding
  203. segment_ids += padding
  204. assert len(input_ids) == max_seq_length
  205. assert len(input_mask) == max_seq_length
  206. assert len(segment_ids) == max_seq_length
  207. label = example.label
  208. if example_index < 5:
  209. logger.info("*** Example ***")
  210. logger.info("swag_id: {}".format(example.swag_id))
  211. logger.info("tokens: {}".format(" ".join(tokens)))
  212. logger.info("input_ids: {}".format(" ".join(map(str, input_ids))))
  213. logger.info("attention_mask: {}".format(" ".join(map(str, input_mask))))
  214. logger.info("token_type_ids: {}".format(" ".join(map(str, segment_ids))))
  215. if is_training:
  216. logger.info("label: {}".format(label))
  217. features.append(InputFeatures(input_ids = input_ids, attention_mask = input_mask, token_type_ids = segment_ids, label = label ))
  218. return features
  219. def _truncate_seq_pair(tokens_a, tokens_b, max_length):
  220. """Truncates a sequence pair in place to the maximum length."""
  221. # This is a simple heuristic which will always truncate the longer sequence
  222. # one token at a time. This makes more sense than truncating an equal percent
  223. # of tokens from each, since if one sequence is very short then each token
  224. # that's truncated likely contains more information than a longer sequence.
  225. while True:
  226. total_length = len(tokens_a) + len(tokens_b)
  227. if total_length <= max_length:
  228. break
  229. if len(tokens_a) > len(tokens_b):
  230. tokens_a.pop()
  231. else:
  232. tokens_b.pop()
  233. '''def accuracy(out, labels):
  234. outputs = np.argmax(out, axis=1)
  235. return np.sum(outputs == labels)'''
  236. '''def accuracy(out, labels):
  237. outputs = (out >= 0.5).astype(np.int)
  238. return np.sum(outputs == labels)'''
  239. def accuracy(out, labels):
  240. outputs = (out.squeeze(1) >= 0.5).astype(np.int)
  241. return np.sum(outputs == labels)
  242. def select_field(features, field):
  243. return [[choice[field] for choice in feature.choices_features] for feature in features]
  244. def set_seed(args):
  245. random.seed(args.seed)
  246. np.random.seed(args.seed)
  247. torch.manual_seed(args.seed)
  248. if args.n_gpu > 0:
  249. torch.cuda.manual_seed_all(args.seed)
  250. def load_and_cache_examples(args, tokenizer, train = False, evaluate=False, test = False, output_examples=False):
  251. if args.local_rank not in [-1, 0]:
  252. torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
  253. # Load data features from cache or dataset file
  254. # input_file = args.predict_file if evaluate else args.train_file
  255. if train:
  256. input_file = args.train_file
  257. if evaluate:
  258. input_file = args.predict_file
  259. if test:
  260. input_file = args.test_file
  261. cached_features_file = os.path.join(
  262. os.path.dirname(input_file),
  263. "cached_{}_{}_{}".format(
  264. "dev" if evaluate else "train",
  265. list(filter(None, args.model_name_or_path.split("/"))).pop(),
  266. str(args.max_seq_length),
  267. ),
  268. )
  269. if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples:
  270. logger.info("Loading features from cached file %s", cached_features_file)
  271. features = torch.load(cached_features_file)
  272. else:
  273. logger.info("Creating features from dataset file at %s", input_file)
  274. examples = read_boolq_examples(input_file)
  275. features = convert_examples_to_features(examples, tokenizer, args.max_seq_length, not evaluate)
  276. if args.local_rank in [-1, 0]:
  277. logger.info("Saving features into cached file %s", cached_features_file)
  278. torch.save(features, cached_features_file)
  279. if args.local_rank == 0:
  280. torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
  281. # Convert to Tensors and build dataset
  282. # all_input_ids = torch.tensor(select_field(features, "input_ids"), dtype=torch.long)
  283. # all_input_mask = torch.tensor(select_field(features, "input_mask"), dtype=torch.long)
  284. # all_segment_ids = torch.tensor(select_field(features, "segment_ids"), dtype=torch.long)
  285. # all_label = torch.tensor([f.label for f in features], dtype=torch.long)
  286. all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
  287. all_input_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
  288. all_segment_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
  289. all_label = torch.tensor([f.label for f in features], dtype=torch.long)
  290. if evaluate:
  291. dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label)
  292. else:
  293. dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label)
  294. if output_examples:
  295. return dataset, examples, features
  296. return dataset
  297. def train(args, train_dataset, model, tokenizer):
  298. """ Train the model """
  299. '''if args.local_rank in [-1, 0]:
  300. tb_writer = SummaryWriter()'''
  301. if args.local_rank in [-1, 0]:
  302. tb_writer = SummaryWriter(args.tbname)
  303. args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
  304. train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
  305. train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size)
  306. print("len(train_dataset): ",len(train_dataset))
  307. if args.max_steps > 0:
  308. t_total = args.max_steps
  309. args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
  310. else:
  311. t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
  312. # Prepare optimizer and schedule (linear warmup and decay)
  313. no_decay = ["bias", "LayerNorm.weight"]
  314. optimizer_grouped_parameters = [
  315. {
  316. "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
  317. "weight_decay": args.weight_decay,
  318. },
  319. {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
  320. ]
  321. optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
  322. scheduler = get_linear_schedule_with_warmup(
  323. optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
  324. )
  325. if args.fp16:
  326. try:
  327. from apex import amp
  328. except ImportError:
  329. raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
  330. model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level)
  331. # multi-gpu training (should be after apex fp16 initialization)
  332. if args.n_gpu > 1:
  333. model = torch.nn.DataParallel(model)
  334. # Distributed training (should be after apex fp16 initialization)
  335. if args.local_rank != -1:
  336. model = torch.nn.parallel.DistributedDataParallel(
  337. model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True
  338. )
  339. # Train!
  340. logger.info("***** Running training *****")
  341. logger.info(" Num examples = %d", len(train_dataset))
  342. logger.info(" Num Epochs = %d", args.num_train_epochs)
  343. logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size)
  344. logger.info(
  345. " Total train batch size (w. parallel, distributed & accumulation) = %d",
  346. args.train_batch_size
  347. * args.gradient_accumulation_steps
  348. * (torch.distributed.get_world_size() if args.local_rank != -1 else 1),
  349. )
  350. logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
  351. logger.info(" Total optimization steps = %d", t_total)
  352. global_step = 0
  353. tr_loss, logging_loss = 0.0, 0.0
  354. model.zero_grad()
  355. train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
  356. set_seed(args) # Added here for reproductibility
  357. for _ in train_iterator:
  358. epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
  359. print("len(train_dataloader): ",len(train_dataloader))
  360. for step, batch in enumerate(epoch_iterator):
  361. model.train()
  362. batch = tuple(t.to(args.device) for t in batch)
  363. inputs = {
  364. "input_ids": batch[0],
  365. "attention_mask": batch[1],
  366. # 'token_type_ids': None if args.model_type == 'xlm' else batch[2],
  367. #"token_type_ids": batch[2],
  368. "labels": batch[3].float(),
  369. }
  370. # if args.model_type in ['xlnet', 'xlm']:
  371. # inputs.update({'cls_index': batch[5],
  372. # 'p_mask': batch[6]})
  373. outputs = model(**inputs)
  374. '''logit = outputs[1]
  375. print(logit)
  376. print(logit.shape)
  377. print(batch[3])
  378. print(batch[3].shape)'''
  379. '''logit = outputs[1]
  380. print("logit", logit)
  381. print("logit.shape", logit.shape)
  382. print("batch[3]", batch[3])
  383. print("batch[3].shape", batch[3].shape)
  384. print("batch[3].type", type(batch[3]))'''
  385. loss = outputs[0] # model outputs are always tuple in transformers (see doc)
  386. if args.n_gpu > 1:
  387. loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training
  388. if args.gradient_accumulation_steps > 1:
  389. loss = loss / args.gradient_accumulation_steps
  390. if args.fp16:
  391. with amp.scale_loss(loss, optimizer) as scaled_loss:
  392. scaled_loss.backward()
  393. torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
  394. else:
  395. loss.backward()
  396. torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
  397. tr_loss += loss.item()
  398. if (step + 1) % args.gradient_accumulation_steps == 0:
  399. optimizer.step()
  400. scheduler.step() # Update learning rate schedule
  401. model.zero_grad()
  402. global_step += 1
  403. if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
  404. # Log metrics
  405. #if (args.local_rank == -1 and args.evaluate_during_training):
  406. if args.evaluate_during_training:
  407. eval = True
  408. test = False
  409. results = evaluate(eval, test, args, model, tokenizer)
  410. for key, value in results.items():
  411. tb_writer.add_scalar("eval_{}".format(key), value, global_step)
  412. # Save model checkpoint
  413. output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
  414. if not os.path.exists(output_dir):
  415. os.makedirs(output_dir)
  416. model_to_save = (
  417. model.module if hasattr(model, "module") else model
  418. ) # Take care of distributed/parallel training
  419. model_to_save.save_pretrained(output_dir)
  420. tokenizer.save_vocabulary(output_dir)
  421. torch.save(args, os.path.join(output_dir, "training_args.bin"))
  422. logger.info("Saving model checkpoint to %s", output_dir)
  423. tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
  424. tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
  425. logging_loss = tr_loss
  426. '''if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
  427. # Save model checkpoint
  428. output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
  429. if not os.path.exists(output_dir):
  430. os.makedirs(output_dir)
  431. model_to_save = (
  432. model.module if hasattr(model, "module") else model
  433. ) # Take care of distributed/parallel training
  434. model_to_save.save_pretrained(output_dir)
  435. tokenizer.save_vocabulary(output_dir)
  436. torch.save(args, os.path.join(output_dir, "training_args.bin"))
  437. logger.info("Saving model checkpoint to %s", output_dir)'''
  438. if args.max_steps > 0 and global_step > args.max_steps:
  439. epoch_iterator.close()
  440. break
  441. if args.max_steps > 0 and global_step > args.max_steps:
  442. train_iterator.close()
  443. break
  444. if args.local_rank in [-1, 0]:
  445. tb_writer.close()
  446. return global_step, tr_loss / global_step
  447. # train = False, evaluate=False, test = False,
  448. def evaluate(eval, test, args, model, tokenizer, prefix=""):
  449. if eval :
  450. dataset, examples, features = load_and_cache_examples(args, tokenizer, train=False, evaluate=True, test=False,
  451. output_examples=True)
  452. if test :
  453. dataset, examples, features = load_and_cache_examples(args, tokenizer, train=False, evaluate=False, test=True,
  454. output_examples=True)
  455. # train = False, evaluate=False, test = False,
  456. if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
  457. os.makedirs(args.output_dir)
  458. args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
  459. # Note that DistributedSampler samples randomly
  460. eval_sampler = SequentialSampler(dataset) if args.local_rank == -1 else DistributedSampler(dataset)
  461. eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
  462. # Eval!
  463. logger.info("***** Running evaluation {} *****".format(prefix))
  464. logger.info(" Num examples = %d", len(dataset))
  465. logger.info(" Batch size = %d", args.eval_batch_size)
  466. eval_loss, eval_accuracy = 0, 0
  467. nb_eval_steps, nb_eval_examples = 0, 0
  468. for batch in tqdm(eval_dataloader, desc="Evaluating"):
  469. model.eval()
  470. batch = tuple(t.to(args.device) for t in batch)
  471. with torch.no_grad():
  472. inputs = {
  473. "input_ids": batch[0],
  474. "attention_mask": batch[1],
  475. # 'token_type_ids': None if args.model_type == 'xlm' else batch[2] # XLM don't use segment_ids
  476. #"token_type_ids": batch[2],
  477. "labels": batch[3].float(),
  478. }
  479. # if args.model_type in ['xlnet', 'xlm']:
  480. # inputs.update({'cls_index': batch[4],
  481. # 'p_mask': batch[5]})
  482. outputs = model(**inputs)
  483. tmp_eval_loss, logits = outputs[:2]
  484. eval_loss += tmp_eval_loss.mean().item()
  485. logits = logits.detach().cpu().numpy()
  486. label_ids = inputs["labels"].to("cpu").numpy()
  487. tmp_eval_accuracy = accuracy(logits, label_ids)
  488. eval_accuracy += tmp_eval_accuracy
  489. nb_eval_steps += 1
  490. nb_eval_examples += inputs["input_ids"].size(0)
  491. '''print("inputs[\"input_ids\"]: ", inputs["input_ids"])
  492. print("inputs[\"input_ids\"].shape: ", inputs["input_ids"].shape)'''
  493. eval_loss = eval_loss / nb_eval_steps
  494. eval_accuracy = eval_accuracy / nb_eval_examples
  495. result = {"eval_loss": eval_loss, "eval_accuracy": eval_accuracy}
  496. output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
  497. with open(output_eval_file, "w") as writer:
  498. logger.info("***** Eval results *****")
  499. for key in sorted(result.keys()):
  500. logger.info("%s = %s", key, str(result[key]))
  501. writer.write("%s = %s\n" % (key, str(result[key])))
  502. return result
  503. def main():
  504. parser = argparse.ArgumentParser()
  505. # Required parameters
  506. parser.add_argument(
  507. "--train_file",
  508. default=None,
  509. type=str,
  510. required=True,
  511. help="SWAG csv for training. E.g., train.csv"
  512. )
  513. parser.add_argument(
  514. "--predict_file",
  515. default=None,
  516. type=str,
  517. required=True,
  518. help="SWAG csv for predictions. E.g., val.csv",
  519. )
  520. parser.add_argument(
  521. "--test_file",
  522. default=None,
  523. type=str,
  524. required=True,
  525. help="SWAG csv for test. E.g., test.csv",
  526. )
  527. parser.add_argument(
  528. "--tbname",
  529. default=None,
  530. type=str,
  531. required=True,
  532. help="tbname"
  533. )
  534. parser.add_argument(
  535. "--model_type",
  536. default=None,
  537. type=str,
  538. required=True,
  539. help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()),
  540. )
  541. parser.add_argument(
  542. "--model_name_or_path",
  543. default=None,
  544. type=str,
  545. required=True,
  546. help="Path to pre-trained model or shortcut name selected in the list: " + ", ",
  547. )
  548. parser.add_argument(
  549. "--output_dir",
  550. default=None,
  551. type=str,
  552. required=True,
  553. help="The output directory where the model checkpoints and predictions will be written.",
  554. )
  555. # Other parameters
  556. parser.add_argument(
  557. "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name"
  558. )
  559. parser.add_argument(
  560. "--tokenizer_name",
  561. default="",
  562. type=str,
  563. help="Pretrained tokenizer name or path if not the same as model_name",
  564. )
  565. parser.add_argument(
  566. "--max_seq_length",
  567. default=384,
  568. type=int,
  569. help="The maximum total input sequence length after tokenization. Sequences "
  570. "longer than this will be truncated, and sequences shorter than this will be padded.",
  571. )
  572. parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
  573. parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
  574. parser.add_argument(
  575. "--evaluate_during_training", action="store_true", help="Rul evaluation during training at each logging step."
  576. )
  577. parser.add_argument(
  578. "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model."
  579. )
  580. parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
  581. parser.add_argument(
  582. "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation."
  583. )
  584. parser.add_argument("--learning_rate", default=1e-5, type=float, help="The initial learning rate for Adam.")
  585. parser.add_argument(
  586. "--gradient_accumulation_steps",
  587. type=int,
  588. default=1,
  589. help="Number of updates steps to accumulate before performing a backward/update pass.",
  590. )
  591. parser.add_argument("--weight_decay", default=0.1, type=float, help="Weight deay if we apply some.")
  592. parser.add_argument("--adam_epsilon", default=1e-6, type=float, help="Epsilon for Adam optimizer.")
  593. parser.add_argument("--max_grad_norm", default=5.0, type=float, help="Max gradient norm.")
  594. parser.add_argument(
  595. "--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform."
  596. )
  597. parser.add_argument(
  598. "--max_steps",
  599. default=-1,
  600. type=int,
  601. help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
  602. )
  603. parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
  604. parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.")
  605. parser.add_argument("--save_steps", type=int, default=50, help="Save checkpoint every X updates steps.")
  606. parser.add_argument(
  607. "--eval_all_checkpoints",
  608. action="store_true",
  609. help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
  610. )
  611. parser.add_argument("--no_cuda", action="store_true", help="Whether not to use CUDA when available")
  612. parser.add_argument(
  613. "--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory"
  614. )
  615. parser.add_argument(
  616. "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
  617. )
  618. parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
  619. parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus")
  620. parser.add_argument(
  621. "--fp16",
  622. action="store_true",
  623. help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
  624. )
  625. parser.add_argument(
  626. "--fp16_opt_level",
  627. type=str,
  628. default="O1",
  629. help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
  630. "See details at https://nvidia.github.io/apex/amp.html",
  631. )
  632. parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.")
  633. parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.")
  634. args = parser.parse_args()
  635. if (
  636. os.path.exists(args.output_dir)
  637. and os.listdir(args.output_dir)
  638. and args.do_train
  639. and not args.overwrite_output_dir
  640. ):
  641. raise ValueError(
  642. "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
  643. args.output_dir
  644. )
  645. )
  646. # Setup distant debugging if needed
  647. if args.server_ip and args.server_port:
  648. # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
  649. import ptvsd
  650. print("Waiting for debugger attach")
  651. ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
  652. ptvsd.wait_for_attach()
  653. # Setup CUDA, GPU & distributed training
  654. if args.local_rank == -1 or args.no_cuda:
  655. device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
  656. args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
  657. else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
  658. torch.cuda.set_device(args.local_rank)
  659. device = torch.device("cuda", args.local_rank)
  660. torch.distributed.init_process_group(backend="nccl")
  661. args.n_gpu = 1
  662. args.device = device
  663. # Setup logging
  664. logging.basicConfig(
  665. format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
  666. datefmt="%m/%d/%Y %H:%M:%S",
  667. level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
  668. )
  669. logger.warning(
  670. "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
  671. args.local_rank,
  672. device,
  673. args.n_gpu,
  674. bool(args.local_rank != -1),
  675. args.fp16,
  676. )
  677. # Set seed
  678. set_seed(args)
  679. # Load pretrained model and tokenizer
  680. if args.local_rank not in [-1, 0]:
  681. torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab
  682. args.model_type = args.model_type.lower()
  683. config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
  684. config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path)
  685. tokenizer = tokenizer_class.from_pretrained(
  686. args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case
  687. )
  688. config.num_labels = 1
  689. model = model_class.from_pretrained(
  690. args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config
  691. )
  692. if args.local_rank == 0:
  693. torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab
  694. model.to(args.device)
  695. logger.info("Training/evaluation parameters %s", args)
  696. # Training
  697. if args.do_train:
  698. train_dataset = load_and_cache_examples(args, tokenizer, train = True, evaluate=False, test = False, output_examples=False)
  699. # train = False, evaluate=False, test = False,
  700. global_step, tr_loss = train(args, train_dataset, model, tokenizer)
  701. logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)
  702. # Save the trained model and the tokenizer
  703. if args.local_rank == -1 or torch.distributed.get_rank() == 0:
  704. # Create output directory if needed
  705. if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
  706. os.makedirs(args.output_dir)
  707. logger.info("Saving model checkpoint to %s", args.output_dir)
  708. # Save a trained model, configuration and tokenizer using `save_pretrained()`.
  709. # They can then be reloaded using `from_pretrained()`
  710. model_to_save = (
  711. model.module if hasattr(model, "module") else model
  712. ) # Take care of distributed/parallel training
  713. model_to_save.save_pretrained(args.output_dir)
  714. tokenizer.save_pretrained(args.output_dir)
  715. # Good practice: save your training arguments together with the trained model
  716. torch.save(args, os.path.join(args.output_dir, "training_args.bin"))
  717. # Load a trained model and vocabulary that you have fine-tuned
  718. model = model_class.from_pretrained(args.output_dir)
  719. tokenizer = tokenizer_class.from_pretrained(args.output_dir)
  720. model.to(args.device)
  721. # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
  722. results = {}
  723. if args.do_eval and args.local_rank in [-1, 0]:
  724. if args.do_train:
  725. checkpoints = [args.output_dir]
  726. else:
  727. # if do_train is False and do_eval is true, load model directly from pretrained.
  728. checkpoints = [args.model_name_or_path]
  729. if args.eval_all_checkpoints:
  730. checkpoints = list(
  731. os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
  732. )
  733. logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs
  734. logger.info("Evaluate the following checkpoints: %s", checkpoints)
  735. for checkpoint in checkpoints:
  736. # Reload the model
  737. global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else ""
  738. model = model_class.from_pretrained(checkpoint)
  739. tokenizer = tokenizer_class.from_pretrained(checkpoint)
  740. model.to(args.device)
  741. # Evaluate
  742. eval = False
  743. test = True
  744. result = evaluate(eval, test, args, model, tokenizer, prefix=global_step,)
  745. # train = False, evaluate=False, test = False,
  746. result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
  747. results.update(result)
  748. logger.info("Results: {}".format(results))
  749. return results
  750. if __name__ == "__main__":
  751. main()

use_boolq_roberta.py

CUDA_VISIBLE_DEVICES=2 python use_boolq_roberta.py --model_type roberta --model_name_or_path roberta-large --do_eval --do_lower_case --train_file train.jsonl --predict_file val.jsonl --test_file test3.jsonl --per_gpu_eval_batch_size=8 --output_dir boolq_roberta_output/checkpoint-3500 
  1. import json
  2. import argparse
  3. import csv
  4. import glob
  5. import logging
  6. import os
  7. import random
  8. import numpy as np
  9. import torch
  10. from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
  11. from torch.utils.data.distributed import DistributedSampler
  12. from tqdm import tqdm, trange
  13. from transformers import (
  14. WEIGHTS_NAME,
  15. AdamW,
  16. RobertaConfig,
  17. RobertaForSequenceClassification,
  18. RobertaTokenizer,
  19. get_linear_schedule_with_warmup,
  20. )
  21. try:
  22. from torch.utils.tensorboard import SummaryWriter
  23. except ImportError:
  24. from tensorboardX import SummaryWriter
  25. logger = logging.getLogger(__name__)
  26. MODEL_CLASSES = {
  27. "roberta": (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer),
  28. }
  29. change = [1301, 1313, 1319, 1321, 1326, 1338, 1350, 1355, 1373, 1376, 1382, 1387, 1389, 1390, 1404, 1415, 1418, 1420, 1421, 1422, 1424, 1425, 1428, 1430, 1431, 1438, 1440, 1450, 1453, 1465, 1466, 1489, 1491, 1492, 1514, 1531, 1533, 1535, 1550, 1554, 1582, 1593, 1597, 1607, 1611, 1615, 1622, 1626, 1631, 1637, 1638, 1641, 1666, 1685, 1687, 1696, 1698, 1702, 1713, 1717, 1732, 1733, 1739, 1743, 1749, 1757, 1762, 1767, 1770, 1778, 1780, 1789, 1794, 1809, 1811, 1815, 1816, 1826, 1831, 1833, 1835, 1838, 1849, 1855, 1858, 1862, 1879, 1883, 1884, 1897, 1926, 1941, 1953, 1954, 1956, 1961, 1990, 1991, 1997, 1999, 2007, 2008, 2012, 2035, 2038, 2045, 2051, 2052, 2067, 2074, 2079, 2083, 2086, 2109, 2116, 2119, 2141, 2142, 2158, 2162, 2168, 2194, 2199, 2202, 2204, 2206, 2212, 2215, 2219, 2226, 2227, 2231, 2237, 2239, 2245, 2249, 2271, 2272, 2275, 2295, 2300, 2307, 2317, 2321, 2336, 2342, 2343, 2350, 2354, 2367, 2369, 2371, 2379, 2382, 2391, 2396, 2397, 2406, 2407, 2417, 2421, 2431, 2432, 2436, 2443, 2447, 2449, 2452, 2454, 2471, 2474, 2480, 2483, 2499, 2501, 2510, 2519, 2530, 2533, 2547, 2555, 2565]
  30. class BoolqExample(object):
  31. """A single training/test example for the Dream dataset."""
  32. def __init__(self, swag_id, context_sentence, start_ending, ending_0, label=None):
  33. self.swag_id = swag_id
  34. self.context_sentence = context_sentence
  35. self.start_ending = start_ending
  36. self.endings = [
  37. ending_0
  38. # ending_1,
  39. ]
  40. self.label = label
  41. def __str__(self):
  42. return self.__repr__()
  43. def __repr__(self):
  44. attributes = [
  45. "swag_id: {}".format(self.swag_id),
  46. "context_sentence: {}".format(self.context_sentence),
  47. "start_ending: {}".format(self.start_ending),
  48. "ending_0: {}".format(self.endings[0]),
  49. #"ending_1: {}".format(self.endings[1]),
  50. ]
  51. if self.label is not None:
  52. attributes.append("label: {}".format(self.label))
  53. return ", ".join(attributes)
  54. '''class InputFeatures(object):
  55. def __init__(self, example_id, choices_features, label):
  56. self.example_id = example_id
  57. self.choices_features = [
  58. {"input_ids": input_ids, "input_mask": input_mask, "segment_ids": segment_ids}
  59. for _, input_ids, input_mask, segment_ids in choices_features
  60. ]
  61. self.label = label'''
  62. class InputFeatures(object):
  63. """
  64. A single set of features of data.
  65. Args:
  66. input_ids: Indices of input sequence tokens in the vocabulary.
  67. attention_mask: Mask to avoid performing attention on padding token indices.
  68. Mask values selected in ``[0, 1]``:
  69. Usually ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
  70. token_type_ids: Segment token indices to indicate first and second portions of the inputs.
  71. label: Label corresponding to the input
  72. """
  73. def __init__(self, input_ids, attention_mask=None, token_type_ids=None, label=None):
  74. self.input_ids = input_ids
  75. self.attention_mask = attention_mask
  76. self.token_type_ids = token_type_ids
  77. self.label = label
  78. def read_boolq_examples(input_file, is_training=True):
  79. with open(input_file, "r", encoding="utf-8") as f:
  80. lines = f.readlines()
  81. examples = []
  82. for line in lines:
  83. data_raw = json.loads(line.strip("\n"))
  84. if data_raw["label"]:
  85. label_input = 1
  86. else:
  87. label_input = 0
  88. examples.append(
  89. BoolqExample(
  90. swag_id=data_raw["idx"],
  91. context_sentence=data_raw["passage"],
  92. start_ending=data_raw["question"],
  93. ending_0="",
  94. # ending_1="",
  95. label=label_input if is_training else None,
  96. )
  97. )
  98. return examples
  99. def convert_examples_to_features(examples, tokenizer, max_seq_length, is_training):
  100. """Loads a data file into a list of `InputBatch`s."""
  101. # Swag is a multiple choice task. To perform this task using Bert,
  102. # we will use the formatting proposed in "Improving Language
  103. # Understanding by Generative Pre-Training" and suggested by
  104. # @jacobdevlin-google in this issue
  105. # https://github.com/google-research/bert/issues/38.
  106. #
  107. # Each choice will correspond to a sample on which we run the
  108. # inference. For a given Swag example, we will create the 4
  109. # following inputs:
  110. # - [CLS] context [SEP] choice_1 [SEP]
  111. # - [CLS] context [SEP] choice_2 [SEP]
  112. # - [CLS] context [SEP] choice_3 [SEP]
  113. # - [CLS] context [SEP] choice_4 [SEP]
  114. # The model will output a single value for each input. To get the
  115. # final decision of the model, we will run a softmax over these 4
  116. # outputs.
  117. features = []
  118. for example_index, example in tqdm(enumerate(examples)):
  119. context_tokens = tokenizer.tokenize(example.context_sentence)
  120. start_ending_tokens = tokenizer.tokenize(example.start_ending)
  121. context_tokens_choice = context_tokens[:]
  122. ending_tokens = start_ending_tokens
  123. _truncate_seq_pair(context_tokens_choice, ending_tokens, max_seq_length - 3)
  124. tokens = ["[CLS]"] + context_tokens_choice + ["[SEP]"] + ending_tokens + ["[SEP]"]
  125. segment_ids = [0] * (len(context_tokens_choice) + 2) + [1] * (len(ending_tokens) + 1)
  126. input_ids = tokenizer.convert_tokens_to_ids(tokens)
  127. input_mask = [1] * len(input_ids)
  128. # Zero-pad up to the sequence length.
  129. padding = [0] * (max_seq_length - len(input_ids))
  130. input_ids += padding
  131. input_mask += padding
  132. segment_ids += padding
  133. assert len(input_ids) == max_seq_length
  134. assert len(input_mask) == max_seq_length
  135. assert len(segment_ids) == max_seq_length
  136. label = example.label
  137. if example_index < 5:
  138. logger.info("*** Example ***")
  139. logger.info("swag_id: {}".format(example.swag_id))
  140. logger.info("tokens: {}".format(" ".join(tokens)))
  141. logger.info("input_ids: {}".format(" ".join(map(str, input_ids))))
  142. logger.info("attention_mask: {}".format(" ".join(map(str, input_mask))))
  143. logger.info("token_type_ids: {}".format(" ".join(map(str, segment_ids))))
  144. if is_training:
  145. logger.info("label: {}".format(label))
  146. features.append(InputFeatures(input_ids = input_ids, attention_mask = input_mask, token_type_ids = segment_ids, label = label ))
  147. return features
  148. def _truncate_seq_pair(tokens_a, tokens_b, max_length):
  149. """Truncates a sequence pair in place to the maximum length."""
  150. # This is a simple heuristic which will always truncate the longer sequence
  151. # one token at a time. This makes more sense than truncating an equal percent
  152. # of tokens from each, since if one sequence is very short then each token
  153. # that's truncated likely contains more information than a longer sequence.
  154. while True:
  155. total_length = len(tokens_a) + len(tokens_b)
  156. if total_length <= max_length:
  157. break
  158. if len(tokens_a) > len(tokens_b):
  159. tokens_a.pop()
  160. else:
  161. tokens_b.pop()
  162. '''def accuracy(out, labels):
  163. outputs = np.argmax(out, axis=1)
  164. return np.sum(outputs == labels)'''
  165. def accuracy(out, labels,n,data):
  166. outputs = (out.squeeze(1) >= 0.5).astype(np.int)
  167. for i in outputs:
  168. n[0] = n[0] + 1
  169. print(n[0], ": ", i)
  170. #data = open("out2.txt", 'a', encoding='utf-8')
  171. print("{\"idx\":", n[0], ", \"label\":", i, "}", file=data)
  172. #data.close()
  173. return np.sum(outputs == labels)
  174. def select_field(features, field):
  175. return [[choice[field] for choice in feature.choices_features] for feature in features]
  176. def set_seed(args):
  177. random.seed(args.seed)
  178. np.random.seed(args.seed)
  179. torch.manual_seed(args.seed)
  180. if args.n_gpu > 0:
  181. torch.cuda.manual_seed_all(args.seed)
  182. def load_and_cache_examples(args, tokenizer, train = False, evaluate=False, test = False, output_examples=False):
  183. if args.local_rank not in [-1, 0]:
  184. torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
  185. # Load data features from cache or dataset file
  186. # input_file = args.predict_file if evaluate else args.train_file
  187. if train:
  188. input_file = args.train_file
  189. if evaluate:
  190. input_file = args.predict_file
  191. if test:
  192. input_file = args.test_file
  193. cached_features_file = os.path.join(
  194. os.path.dirname(input_file),
  195. "cached_{}_{}_{}".format(
  196. "dev" if evaluate else "train",
  197. list(filter(None, args.model_name_or_path.split("/"))).pop(),
  198. str(args.max_seq_length),
  199. ),
  200. )
  201. if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples:
  202. logger.info("Loading features from cached file %s", cached_features_file)
  203. features = torch.load(cached_features_file)
  204. else:
  205. logger.info("Creating features from dataset file at %s", input_file)
  206. examples = read_boolq_examples(input_file)
  207. features = convert_examples_to_features(examples, tokenizer, args.max_seq_length, not evaluate)
  208. if args.local_rank in [-1, 0]:
  209. logger.info("Saving features into cached file %s", cached_features_file)
  210. torch.save(features, cached_features_file)
  211. if args.local_rank == 0:
  212. torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
  213. # Convert to Tensors and build dataset
  214. # all_input_ids = torch.tensor(select_field(features, "input_ids"), dtype=torch.long)
  215. # all_input_mask = torch.tensor(select_field(features, "input_mask"), dtype=torch.long)
  216. # all_segment_ids = torch.tensor(select_field(features, "segment_ids"), dtype=torch.long)
  217. # all_label = torch.tensor([f.label for f in features], dtype=torch.long)
  218. all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
  219. all_input_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
  220. all_segment_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
  221. all_label = torch.tensor([f.label for f in features], dtype=torch.long)
  222. if evaluate:
  223. dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label)
  224. else:
  225. dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label)
  226. if output_examples:
  227. return dataset, examples, features
  228. return dataset
  229. # train = False, evaluate=False, test = False,
  230. def evaluate(data, eval, test, args, model, tokenizer, prefix=""):
  231. if eval :
  232. dataset, examples, features = load_and_cache_examples(args, tokenizer, train=False, evaluate=True, test=False,
  233. output_examples=True)
  234. if test :
  235. dataset, examples, features = load_and_cache_examples(args, tokenizer, train=False, evaluate=False, test=True,
  236. output_examples=True)
  237. # train = False, evaluate=False, test = False,
  238. if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
  239. os.makedirs(args.output_dir)
  240. args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
  241. # Note that DistributedSampler samples randomly
  242. eval_sampler = SequentialSampler(dataset) if args.local_rank == -1 else DistributedSampler(dataset)
  243. eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
  244. # Eval!
  245. logger.info("***** Running evaluation {} *****".format(prefix))
  246. logger.info(" Num examples = %d", len(dataset))
  247. logger.info(" Batch size = %d", args.eval_batch_size)
  248. eval_loss, eval_accuracy = 0, 0
  249. nb_eval_steps, nb_eval_examples = 0, 0
  250. n = [1299]
  251. for batch in tqdm(eval_dataloader, desc="Evaluating"):
  252. model.eval()
  253. batch = tuple(t.to(args.device) for t in batch)
  254. with torch.no_grad():
  255. inputs = {
  256. "input_ids": batch[0],
  257. "attention_mask": batch[1],
  258. # 'token_type_ids': None if args.model_type == 'xlm' else batch[2] # XLM don't use segment_ids
  259. # "token_type_ids": batch[2],
  260. "labels": batch[3].float(),
  261. }
  262. # if args.model_type in ['xlnet', 'xlm']:
  263. # inputs.update({'cls_index': batch[4],
  264. # 'p_mask': batch[5]})
  265. outputs = model(**inputs)
  266. tmp_eval_loss, logits = outputs[:2]
  267. eval_loss += tmp_eval_loss.mean().item()
  268. logits = logits.detach().cpu().numpy()
  269. label_ids = inputs["labels"].to("cpu").numpy()
  270. #print(logits,label_ids)
  271. tmp_eval_accuracy = accuracy(logits, label_ids,n, data)
  272. eval_accuracy += tmp_eval_accuracy
  273. nb_eval_steps += 1
  274. nb_eval_examples += inputs["input_ids"].size(0)
  275. eval_loss = eval_loss / nb_eval_steps
  276. eval_accuracy = eval_accuracy / nb_eval_examples
  277. result = {"eval_loss": eval_loss, "eval_accuracy": eval_accuracy}
  278. output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
  279. with open(output_eval_file, "w") as writer:
  280. logger.info("***** Eval results *****")
  281. for key in sorted(result.keys()):
  282. logger.info("%s = %s", key, str(result[key]))
  283. writer.write("%s = %s\n" % (key, str(result[key])))
  284. return result
  285. def main():
  286. parser = argparse.ArgumentParser()
  287. data = open("out2.txt", 'w', encoding='utf-8')
  288. # Required parameters
  289. parser.add_argument(
  290. "--train_file",
  291. default=None,
  292. type=str,
  293. required=True,
  294. help="SWAG csv for training. E.g., train.csv"
  295. )
  296. parser.add_argument(
  297. "--predict_file",
  298. default=None,
  299. type=str,
  300. required=True,
  301. help="SWAG csv for predictions. E.g., val.csv",
  302. )
  303. parser.add_argument(
  304. "--test_file",
  305. default=None,
  306. type=str,
  307. required=True,
  308. help="SWAG csv for test. E.g., test.csv",
  309. )
  310. parser.add_argument(
  311. "--model_type",
  312. default=None,
  313. type=str,
  314. required=True,
  315. help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()),
  316. )
  317. parser.add_argument(
  318. "--model_name_or_path",
  319. default=None,
  320. type=str,
  321. required=True,
  322. help="Path to pre-trained model or shortcut name selected in the list: " + ", ",
  323. )
  324. parser.add_argument(
  325. "--output_dir",
  326. default=None,
  327. type=str,
  328. required=True,
  329. help="The output directory where the model checkpoints and predictions will be written.",
  330. )
  331. # Other parameters
  332. parser.add_argument(
  333. "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name"
  334. )
  335. parser.add_argument(
  336. "--tokenizer_name",
  337. default="",
  338. type=str,
  339. help="Pretrained tokenizer name or path if not the same as model_name",
  340. )
  341. parser.add_argument(
  342. "--max_seq_length",
  343. default=384,
  344. type=int,
  345. help="The maximum total input sequence length after tokenization. Sequences "
  346. "longer than this will be truncated, and sequences shorter than this will be padded.",
  347. )
  348. parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
  349. parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
  350. parser.add_argument(
  351. "--evaluate_during_training", action="store_true", help="Rul evaluation during training at each logging step."
  352. )
  353. parser.add_argument(
  354. "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model."
  355. )
  356. parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
  357. parser.add_argument(
  358. "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation."
  359. )
  360. parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
  361. parser.add_argument(
  362. "--gradient_accumulation_steps",
  363. type=int,
  364. default=1,
  365. help="Number of updates steps to accumulate before performing a backward/update pass.",
  366. )
  367. parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.")
  368. parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
  369. parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
  370. parser.add_argument(
  371. "--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform."
  372. )
  373. parser.add_argument(
  374. "--max_steps",
  375. default=-1,
  376. type=int,
  377. help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
  378. )
  379. parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
  380. parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.")
  381. parser.add_argument("--save_steps", type=int, default=50, help="Save checkpoint every X updates steps.")
  382. parser.add_argument(
  383. "--eval_all_checkpoints",
  384. action="store_true",
  385. help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
  386. )
  387. parser.add_argument("--no_cuda", action="store_true", help="Whether not to use CUDA when available")
  388. parser.add_argument(
  389. "--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory"
  390. )
  391. parser.add_argument(
  392. "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
  393. )
  394. parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
  395. parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus")
  396. parser.add_argument(
  397. "--fp16",
  398. action="store_true",
  399. help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
  400. )
  401. parser.add_argument(
  402. "--fp16_opt_level",
  403. type=str,
  404. default="O1",
  405. help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
  406. "See details at https://nvidia.github.io/apex/amp.html",
  407. )
  408. parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.")
  409. parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.")
  410. args = parser.parse_args()
  411. if (
  412. os.path.exists(args.output_dir)
  413. and os.listdir(args.output_dir)
  414. and args.do_train
  415. and not args.overwrite_output_dir
  416. ):
  417. raise ValueError(
  418. "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
  419. args.output_dir
  420. )
  421. )
  422. # Setup distant debugging if needed
  423. if args.server_ip and args.server_port:
  424. # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
  425. import ptvsd
  426. print("Waiting for debugger attach")
  427. ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
  428. ptvsd.wait_for_attach()
  429. # Setup CUDA, GPU & distributed training
  430. if args.local_rank == -1 or args.no_cuda:
  431. device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
  432. args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
  433. else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
  434. torch.cuda.set_device(args.local_rank)
  435. device = torch.device("cuda", args.local_rank)
  436. torch.distributed.init_process_group(backend="nccl")
  437. args.n_gpu = 1
  438. args.device = device
  439. # Setup logging
  440. logging.basicConfig(
  441. format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
  442. datefmt="%m/%d/%Y %H:%M:%S",
  443. level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
  444. )
  445. logger.warning(
  446. "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
  447. args.local_rank,
  448. device,
  449. args.n_gpu,
  450. bool(args.local_rank != -1),
  451. args.fp16,
  452. )
  453. # Set seed
  454. set_seed(args)
  455. # Load pretrained model and tokenizer
  456. if args.local_rank not in [-1, 0]:
  457. torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab
  458. args.model_type = args.model_type.lower()
  459. config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
  460. config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path)
  461. tokenizer = tokenizer_class.from_pretrained(
  462. args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case
  463. )
  464. config.num_labels = 1
  465. model = model_class.from_pretrained(
  466. args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config
  467. )
  468. if args.local_rank == 0:
  469. torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab
  470. model.to(args.device)
  471. logger.info("Training/evaluation parameters %s", args)
  472. # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
  473. results = {}
  474. if args.do_eval and args.local_rank in [-1, 0]:
  475. checkpoints = [args.output_dir]
  476. logger.info("Evaluate the following checkpoints: %s", checkpoints)
  477. for checkpoint in checkpoints:
  478. # Reload the model
  479. global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else ""
  480. model = model_class.from_pretrained(checkpoint)
  481. tokenizer = tokenizer_class.from_pretrained(checkpoint)
  482. model.to(args.device)
  483. # Evaluate
  484. eval = False
  485. test = True
  486. result = evaluate(data, eval, test, args, model, tokenizer, prefix=global_step,)
  487. # train = False, evaluate=False, test = False,
  488. result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
  489. results.update(result)
  490. logger.info("Results: {}".format(results))
  491. data.close()
  492. return results
  493. if __name__ == "__main__":
  494. main()

 

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号