赞
踩
数据读取方法总结有3种:
(1)预加载数据:预定义常量或变量来保存数据
(2)供给数据:在会话中运行run()函数的时候通过复制给feed_dict参数的方式将数据注入placeholder中。
(3)从文件中读取数据:在TensorFlow图的起始,让一个输入管线从文件中读取数据。
12.1 TFRecord格式
message Example{
Features features=1;
};
message Features{
map<string,Feature> feature=1;
};
message Feature{
oneof kind{
BytesList bytes_list=1;
FloatList float_list=2;
Int64List int64_list=3;
}
};
下面通过一段代码将Fashion_MNIST数据集转化为TFRecord文件。大致过程就是:
import tensorflow as tf import numpy as np (train_images,train_labels),(test_images,test_labels)=tf.keras.datasets.fashion_mnist.load_data() def Int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def Bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) pixels=train_images.shape[1] num_examples=train_images.shape[0] filename="/home/xxy/TFRecord/fashion_MNIST.tfrecords" writer=tf.io.TFRecordWriter(filename) for i in range(num_examples): image_to_string=train_images[i].tostring() feature={ "pixels":Int64_feature(pixels), "label":Int64_feature(train_labels[i]), "image_raw":Bytes_feature(image_to_string) } features=tf.train.Features(feature=feature) example=tf.train.Example(features=features) writer.write(example.SerializaToString()) print("writed ok") writer.close()
使用data模块中的data.experimental模块中的TFRecordWriter类将Fashion_MNIIST数据集写入到一个TFRecord文件中:
import tensorflow as tf (train_images,train_labels),(test_images,test_labels)=tf.keras.datasets.fashion_mnist.load_data() def Int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def Bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) pixels=train_images.shape[1] num_examples=train_images.shape[0] filename="/home/xxy/TFRecord/fashion_MNIST2.tfrecords" #writer=tf.io.TFRecordWriter(filename) writer=tf.data.experimental.TFRecordWriter(filename) def serialize_example(i): iamge_to_string=train_images[i].tostring() feature={ "pixels":Int64_feature(pixels), "labels=":Int64_feature(train_labels[i]), "image_raw":Bytes_feature(image_to_string) } feature=tf.train.Feature(feature=feature) example_proto=tf.train.Example(features=features) return example_proto.SerializeToString() def generator(): for i in range(num_examples): yield serialize_example(i) serialized_features_dataset=tf.data.Dataset.from_generator(generator,output_types=tf.string,outpt_shape=()) writer.write(serialized_features_dataset) print("writed ok") writer.close()
从TFRecord文件中解析数据:
import tensorflow as tf filename="/home/xxy/TFRecord/fashion-MNIST.tfrecords" tfrecord_dataset=tf.data.TFRecordDataset(filename) ''' test part for sample in tfrecord_dataset.take(5): print(repr(sample)) ''' def parse_function(example_proto): feature=tf.io.parse_single_example(example_proto, features={ "pixels":tf.io.FixedLenFeature([],tf.int64), "label":tf.io.FixedLenFeature([],tf.int64), "iamge_raw":tf.io.FixedLenFeature([],tf.string), }) return feature parsed_dataset=tfrecord_dataset.map(parse_function) '''test code print(parsed_dataset) ''' for parsed_record in parsed_dataset.take(1): images=tf.io.decode_raw(parsed_record["image_raw"],tf.uint8) labels=tf.cast(parsed_record["label"],tf.int32) pixels=tf.cast(parsed_record["pixels"],tf.int32) print(images) print(labels) print(pixels)
12.2 CSV格式
csv-comma-Separated values,字符分隔值。以纯文本形式存储表格数据(数字和文本),这意味这该文件是一个字符序列,读取该文件不需要经过像二进制数据那样反序列化的过程。
import tensorflow as tf
file_name="/home/xxy/CSV/data.csv"
CSV_CLOUMNS=['col1','col2','col3','col4']
csv_dataset=tf.data.experimental.make_csv_dataset(file_name,batch_size=10,shuffle=False,column_names=CSV_CLOUMNS,ignore_errors=True)
examples=next(iter(csv_dataset))
print(examples)
12.3 队列
import tensorflow as tf
Queue=tf.queue.FIFOQueue(2,"int32")
queue_init=Queue.enqueue_many(([10,100],))
for i in range(5):
a=Queue.dequeue()
b=a+10
Queue_en=Queue.enqueue([b])
print(a)
文件队列
import tensorflow as tf import numpy as np (train_images,train_labels),(test_images,test_labels)=tf.keras.datasets.fashion_mnist.load_data() def _int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) pixels=test_images.shape[1] num_examples=test_images.shape[0] num_files=2 for i in range(num_files): filename=("/home/xxy/TFRecord/data_tfrecord-%.1d-of-%.1d"%(i,num_files)) writer=tf.io.TFRecordWriter(filename) for index in range(num_examples): image_string=test_images[index].tostring() example=tf.train.Example(features=tf.train.Features(feature={ "pixels":_int64_feature(pixels), "label":_int64_feature(np.argmax(test_labels[indes])), "image_raw":_bytes_feature(image_string) })) writer.write(example.SerializaToString()) print("writed ok") writer.close()
import tensorflow as tf files=tf.train.match_filenames_once("/home/xxy/TFRecord/data_tfrecords-*") filename_queue=tf.train.string_input_producer(files,shuffle=False) reader=tf.TFRecordReader() _,Serialized_example=reader.read(filename_queue) features=tf.parse_single_example( serialized_example, features={ "image_raw":tf.FixedLenFeature([],tf.string), "pixels":tf.FixedLenFeature([],tf.int64), "label":tf.FixedLenFeature([],tf.int64) }) images=tf.decode_raw(features["image_raw"],tf.uint8) lables=tf.cast(features["labels"],tf.int32) pixels=tf.cast(features["pixels"],tf.int32) with tf.Session() as sess: tf.global_variables_initializer().run() print(sess.run(files)) coordinator=tf.train.Coordinator() threads=tf.train.start_queue_runners(sess=sess,coord=coordinator) for i in range(6): print(sess.run([images,labels])) coordinator.request_stop() coordinator.join(threads)
12.4 多线程处理输入的数据
1.使用coordinator类管理线程
import tensorflow as tf import nnumpy as np import threading import time def Thread_op(coordinator,thread_id): while coordinator.should_stop() == False: if np.random.rand()<0.1: print("Stoping from thread_id : %d "% thread_id) coordinator.request_stop() else: print("Working on thread_id ; %d" % thread_id) time.sleep(10) coordinator=tf.train.Coordinator() threads=[threading.Thread(target=Thread_op,args=(coordinator,i)) for i in range(5)] for j in threads: j.start() coordinator.join(threads)
2.在Tensorflow1.x中使用QueueRunner创建线程
import tensorflow as tf import nnumpy as np import threading import time queue=tf.FIFOQueue(100,"float") enqueue=queue.enqueue([tf.random_normal([10])]) qr=tf.train.QueueRunner(queue,[enqueue]*10) tf.train.add_queue_runner(qr) out_tensor=queue.dequeue() with tf.Session() as sess: coordinator= tf.train.Coordinator() start_queue_runners(sess,coord,daemon,start,collection) threads=tf.train.start_queue_runners(sess=sess,coord=coordinator) for i in range(10): print(sess.run(out_tensor)) coordinator.request_stop() coordinator.join(threads)
12.5 组织数据batch
import tensorflow as tf file = tf.train.match_filename_once("/home/xxy/TFRecord/data_tfrecords-*") filename_queue=tf.train.string_input_producer(files,shuffle=True) reader=tf.TFRecordReader() _,serialized_example=reader.read(filename_queue) features=tf.parse_single_example( serialized_example, features={ "image_raw":tf.FIxedLenFeature([],tf.string), "pixels":tf.FIxedLenFeature([],tf.int64), "label":tf.FIxedLenFeature([],tf.int64) }) images=tf.decode_raw(features["image_raw",tf.uint8]) labels=tf.cast(features["labels",tf.int32]) pixels=tf.cast(features["pixels",tf.int32]) batch_size=10 capacity=5000+3*batch_size tf.train.batch([images,labels],batch_size=batch_size,capacity=capacity,) with tf.Session() as sess: tf.global_variables_initializer().run() coord=tf.train.Coordinator() threads=tf.train.start_queue_runners(sess=sess,coord=coord) for i in range(3): xs,ys=sess.run([image_batch,label_batch]) print(xs,ys) coord.request_stop() coord.join(threads)
import tensorflow as tf
(train_images,train_labels),(test_images,test_labels)=tf.keras.datasets.fashion_mnist.load_data()
train_images=train_images.reshape(60000,784).astype('float32')/255
test_images=train_images.reshape(1000,784).astype('float32')/255
train_datasets=tf.data.Dataset.from_tensor_slices((train_images,train_labels))
train_datasets=train_datasets.shuffle(buffer_size=1024).batch(100)
test_dataset=tf.data.Dataset.from_tensor_slices((test_images,test_labels))
test_dataset=test_dataset.batch(100)
for train_images,train_labels in train_datasets:
print(train_images,train_labels)
import tensorflow as tf import numpy as np n_observations=int(1e4) feature0=np.random.choice([False,True],n_observations) feature1=np.random.randint(0,5,n_observations) strings=np.array([b'cat',b'dog',b'chicken',b'horse',b'goat']) feature2=strings[feature1] feature3=np.random.randn(n_observations) features_dataset=tf.data.Dataset.from_tensor_slices((feature0,feature1,feature2,feature3)) for fo,f1,f2,f3 in features_dataset.take(1): print(f0) print(f1) print(f2) print(f3)
import tensorflow as tf
import numpy as np
(train_images,train_labels),(test_images,test_labels)=tf.keras.datasets.fashion_mnist.load_data()
train_images=train_images.reshape(60000,784).astype('float32')/255
test_images=train_images.reshape(1000,784).astype('float32')/255
train_datasets=tf.data.Dataset.from_tensor_slices((train_images,train_labels))
train_datasets=train_datasets.shuffle(buffer_size=1024).batch(100)
for step,x_batch_train in enumerate(train_datasets):
print(step,x_batch_train)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。