赞
踩
- # -*- coding: utf-8 -*-
- # @Time : 2020/2/25 11:18
- # @Author : liusen
- from torchtext import data
- from tqdm import tqdm
- import pandas as pd
- import numpy as np
- import random
- import torch
- from torchtext.vocab import Vectors
- import os
- import codecs
- import dill
-
- def split_data_train_dev():
- pass
-
-
- def x_tokenize(x):
- # 如果加载进来的是已经转成id的文本
- # 此处必须将字符串转换成整型
- # 否则必须将use_vocab设为True
- return list(x.strip())
-
-
- # print(x_tokenize("我是中国人"))
-
- train_path = '../data/train.csv'
- test_path = '../data/test_new.csv'
- TEXT = data.Field(sequential=True, tokenize=x_tokenize, use_vocab=True, lower=False)
- LABEL = data.Field(sequential=False, use_vocab=False)
-
-
- def get_one_hot(label, N):
- # size = list(label.size())
- # label = label.view(-1) # reshape 为向量
- ones = torch.sparse.torch.eye(N)
- ones = ones.index_select(0, torch.tensor(int(label))) # 用上面的办法转为换one hot
- # size.append(N) # 把类别输目添到size的尾后,准备reshape回原来的尺寸
- return ones
-
-
-
- class MyDataset(data.Dataset):
-
- def __init__(self, path, text_field, label_field, test=False, aug=Fal
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。