博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Keras(二十三)cifar10数据集使用keras generator读取、模型训练、预测
阅读量:4203 次
发布时间:2019-05-26

本文共 13544 字,大约阅读时间需要 45 分钟。

本文将介绍:

  • 使用keras实现resnet50模型
  • 实现迁移学习-finetune

一,下载kaggle-cifar10数据

通过下面的链接,下载dataset到本地目录cifar10

二,实现tensorflow动态按需分配GPU

import matplotlib as mplimport matplotlib.pyplot as pltimport numpy as npimport osimport pandas as pdimport sklearnimport sysimport tensorflow as tfimport timefrom tensorflow import kerasprint(tf.__version__)print(sys.version_info)for module in mpl, np, pd, sklearn, tf, keras:    print(module.__name__, module.__version__)    # 一,实现tensorflow动态按需分配GPUfrom tensorflow.compat.v1 import ConfigProtofrom tensorflow.compat.v1 import InteractiveSessionconfig = ConfigProto()config.gpu_options.allow_growth = Truesession = InteractiveSession(config=config)

三,读取训练集、测试集的csv文件数据和训练集、测试集数据对应关系

# 二,读取训练集、测试集的csv文件数据和训练集、测试集数据对应关系class_names = [    'airplane',    'automobile',    'bird',    'cat',    'deer',    'dog',    'frog',    'horse',    'ship',    'truck',]train_lables_file = './cifar10/trainLabels.csv'test_csv_file = './cifar10/sampleSubmission.csv'train_folder = './cifar10/train/'test_folder = './cifar10/test'def parse_csv_file(filepath, folder):    """Parses csv files into (filename(path), label) format"""    results = []    with open(filepath, 'r') as f:        lines = f.readlines()[1:]    for line in lines:        image_id, label_str = line.strip('\n').split(',')        image_full_path = os.path.join(folder, image_id + '.png')        results.append((image_full_path, label_str))    return resultstrain_labels_info = parse_csv_file(train_lables_file, train_folder)test_csv_info = parse_csv_file(test_csv_file, test_folder)import pprintpprint.pprint(train_labels_info[0:5])pprint.pprint(test_csv_info[0:5])print(len(train_labels_info), len(test_csv_info))

四,将对应关系转换为dataframe类型

# 四,将对应关系转换为dataframe类型# train_df = pd.DataFrame(train_labels_info)train_df = pd.DataFrame(train_labels_info[0:45000])valid_df = pd.DataFrame(train_labels_info[45000:])test_df = pd.DataFrame(test_csv_info)train_df.columns = ['filepath', 'class']valid_df.columns = ['filepath', 'class']test_df.columns = ['filepath', 'class']print(train_df.head())print(valid_df.head())print(test_df.head())

五,使用ImageDataGenerator加载数据并做数据增强

# 五,使用ImageDataGenerator加载数据并做数据增强height = 32width = 32channels = 3batch_size = 32num_classes = 10train_datagen = keras.preprocessing.image.ImageDataGenerator(    rescale = 1./255,    rotation_range = 40,    width_shift_range = 0.2,    height_shift_range = 0.2,    shear_range = 0.2,    zoom_range = 0.2,    horizontal_flip = True,    fill_mode = 'nearest',)train_generator = train_datagen.flow_from_dataframe(    train_df,    directory = './',    x_col = 'filepath',    y_col = 'class',    classes = class_names,    target_size = (height, width),    batch_size = batch_size,    seed = 7,    shuffle = True,    class_mode = 'sparse',)valid_datagen = keras.preprocessing.image.ImageDataGenerator(    rescale = 1./255)valid_generator = valid_datagen.flow_from_dataframe(    valid_df,    directory = './',    x_col = 'filepath',    y_col = 'class',    classes = class_names,    target_size = (height, width),    batch_size = batch_size,    seed = 7,    shuffle = False,    class_mode = "sparse")train_num = train_generator.samplesvalid_num = valid_generator.samplesprint(train_num, valid_num)

六,查看generator数据

for i in range(2):    x, y = train_generator.next()    print(x.shape, y.shape)    print(y)

七,构建模型

# 七,构建模型model = keras.models.Sequential([    keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',                        activation='relu',                         input_shape=[width, height, channels]),    keras.layers.BatchNormalization(),    keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.MaxPool2D(pool_size=2),        keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.MaxPool2D(pool_size=2),    keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.MaxPool2D(pool_size=2),    keras.layers.Flatten(),    keras.layers.Dense(512, activation='relu'),    keras.layers.Dense(num_classes, activation='softmax'),])model.compile(loss="sparse_categorical_crossentropy",              optimizer="adam", metrics=['accuracy'])model.summary()

八,训练模型

# 八,训练模型epochs = 20history = model.fit_generator(train_generator,                              steps_per_epoch = train_num // batch_size,                              epochs = epochs,                              validation_data = valid_generator,                              validation_steps = valid_num // batch_size)

九,打印模型训练曲线

# 九,打印模型训练曲线def plot_learning_curves(history, label, epcohs, min_value, max_value):    data = {
} data[label] = history.history[label] data['val_'+label] = history.history['val_'+label] pd.DataFrame(data).plot(figsize=(8, 5)) plt.grid(True) plt.axis([0, epochs, min_value, max_value]) plt.show() plot_learning_curves(history, 'accuracy', epochs, 0, 1)plot_learning_curves(history, 'loss', epochs, 0, 2)

十,使用keras.ImageDataGenerator加载测试集数据

# 十,使用keras.ImageDataGenerator加载测试集数据test_datagen = keras.preprocessing.image.ImageDataGenerator(    rescale = 1./255)test_generator = valid_datagen.flow_from_dataframe(    test_df,    directory = './',    x_col = 'filepath',    y_col = 'class',    classes = class_names,    target_size = (height, width),    batch_size = batch_size,    seed = 7,    shuffle = False,    class_mode = "sparse")test_num = test_generator.samplesprint(test_num)

十一,使用测试集预测模型结果

# 十一,使用测试集预测模型结果test_predict = model.predict_generator(test_generator,                                       workers = 10,                                       use_multiprocessing = True)
1,测试集预测模型结果维度形状
print(test_predict.shape)
2,抽取前5条数据查看
print(test_predict[0:5])
3,取结果数值为最大的为预测结果
test_predict_class_indices = np.argmax(test_predict, axis = 1)
4,取前5条结果查看
print(test_predict_class_indices[0:5])
5,将结果转化为特征名称
test_predict_class = [class_names[index]                       for index in test_predict_class_indices]

查看前五条结果

print(test_predict_class[0:5])

十二,将预测结果写入到submission.csv文件中,并在kaggle上提交

# 十二,将预测结果写入到submission.csv文件中,并在kaggle上提交def generate_submissions(filename, predict_class):    with open(filename, 'w') as f:        f.write('id,label\n')        for i in range(len(predict_class)):            f.write('%d,%s\n' % (i+1, predict_class[i]))output_file = "./cifar10/submission.csv"generate_submissions(output_file, test_predict_class)

十三,总结代码

#!/usr/bin/env python3# -*- coding: utf-8 -*-import matplotlib as mplimport matplotlib.pyplot as pltimport numpy as npimport osimport pandas as pdimport sklearnimport sysimport tensorflow as tfimport timefrom tensorflow import kerasprint(tf.__version__)print(sys.version_info)for module in mpl, np, pd, sklearn, tf, keras:    print(module.__name__, module.__version__)    # 一,实现tensorflow动态按需分配GPUfrom tensorflow.compat.v1 import ConfigProtofrom tensorflow.compat.v1 import InteractiveSessionconfig = ConfigProto()config.gpu_options.allow_growth = Truesession = InteractiveSession(config=config)    # 二,读取训练集、测试集的csv文件数据和训练集、测试集数据对应关系class_names = [    'airplane',    'automobile',    'bird',    'cat',    'deer',    'dog',    'frog',    'horse',    'ship',    'truck',]train_lables_file = './cifar10/trainLabels.csv'test_csv_file = './cifar10/sampleSubmission.csv'train_folder = './cifar10/train/'test_folder = './cifar10/test'def parse_csv_file(filepath, folder):    """Parses csv files into (filename(path), label) format"""    results = []    with open(filepath, 'r') as f:        lines = f.readlines()[1:]    for line in lines:        image_id, label_str = line.strip('\n').split(',')        image_full_path = os.path.join(folder, image_id + '.png')        results.append((image_full_path, label_str))    return resultstrain_labels_info = parse_csv_file(train_lables_file, train_folder)test_csv_info = parse_csv_file(test_csv_file, test_folder)import pprintpprint.pprint(train_labels_info[0:5])pprint.pprint(test_csv_info[0:5])print(len(train_labels_info), len(test_csv_info))# 四,将对应关系转换为dataframe类型# train_df = pd.DataFrame(train_labels_info)train_df = pd.DataFrame(train_labels_info[0:45000])valid_df = pd.DataFrame(train_labels_info[45000:])test_df = pd.DataFrame(test_csv_info)train_df.columns = ['filepath', 'class']valid_df.columns = ['filepath', 'class']test_df.columns = ['filepath', 'class']print(train_df.head())print(valid_df.head())print(test_df.head())# 五,使用ImageDataGenerator加载数据并做数据增强height = 32width = 32channels = 3batch_size = 32num_classes = 10train_datagen = keras.preprocessing.image.ImageDataGenerator(    rescale = 1./255,    rotation_range = 40,    width_shift_range = 0.2,    height_shift_range = 0.2,    shear_range = 0.2,    zoom_range = 0.2,    horizontal_flip = True,    fill_mode = 'nearest',)train_generator = train_datagen.flow_from_dataframe(    train_df,    directory = './',    x_col = 'filepath',    y_col = 'class',    classes = class_names,    target_size = (height, width),    batch_size = batch_size,    seed = 7,    shuffle = True,    class_mode = 'sparse',)valid_datagen = keras.preprocessing.image.ImageDataGenerator(    rescale = 1./255)valid_generator = valid_datagen.flow_from_dataframe(    valid_df,    directory = './',    x_col = 'filepath',    y_col = 'class',    classes = class_names,    target_size = (height, width),    batch_size = batch_size,    seed = 7,    shuffle = False,    class_mode = "sparse")train_num = train_generator.samplesvalid_num = valid_generator.samplesprint(train_num, valid_num)# 六,查看generator数据for i in range(2):    x, y = train_generator.next()    print(x.shape, y.shape)    print(y)    # 七,构建模型model = keras.models.Sequential([    keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',                        activation='relu',                         input_shape=[width, height, channels]),    keras.layers.BatchNormalization(),    keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.MaxPool2D(pool_size=2),        keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.MaxPool2D(pool_size=2),    keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',                        activation='relu'),    keras.layers.BatchNormalization(),    keras.layers.MaxPool2D(pool_size=2),    keras.layers.Flatten(),    keras.layers.Dense(512, activation='relu'),    keras.layers.Dense(num_classes, activation='softmax'),])model.compile(loss="sparse_categorical_crossentropy",              optimizer="adam", metrics=['accuracy'])model.summary()# 八,训练模型epochs = 20history = model.fit_generator(train_generator,                              steps_per_epoch = train_num // batch_size,                              epochs = epochs,                              validation_data = valid_generator,                              validation_steps = valid_num // batch_size)# 九,打印模型训练曲线def plot_learning_curves(history, label, epcohs, min_value, max_value):    data = {
} data[label] = history.history[label] data['val_'+label] = history.history['val_'+label] pd.DataFrame(data).plot(figsize=(8, 5)) plt.grid(True) plt.axis([0, epochs, min_value, max_value]) plt.show() plot_learning_curves(history, 'accuracy', epochs, 0, 1)plot_learning_curves(history, 'loss', epochs, 0, 2)# 十,使用keras.ImageDataGenerator加载测试集数据test_datagen = keras.preprocessing.image.ImageDataGenerator( rescale = 1./255)test_generator = valid_datagen.flow_from_dataframe( test_df, directory = './', x_col = 'filepath', y_col = 'class', classes = class_names, target_size = (height, width), batch_size = batch_size, seed = 7, shuffle = False, class_mode = "sparse")test_num = test_generator.samplesprint(test_num)# 十一,使用测试集预测模型结果test_predict = model.predict_generator(test_generator, workers = 10, use_multiprocessing = True)# 1,测试集预测模型结果维度形状print(test_predict.shape)# 2,抽取前5条数据查看print(test_predict[0:5])# 3,取结果数值为最大的为预测结果test_predict_class_indices = np.argmax(test_predict, axis = 1)# 4,取前5条结果查看print(test_predict_class_indices[0:5])# 5,将结果转化为特征名称test_predict_class = [class_names[index] for index in test_predict_class_indices]# 查看前五条结果print(test_predict_class[0:5])# 十二,将预测结果写入到submission.csv文件中,并在kaggle上提交def generate_submissions(filename, predict_class): with open(filename, 'w') as f: f.write('id,label\n') for i in range(len(predict_class)): f.write('%d,%s\n' % (i+1, predict_class[i]))output_file = "./cifar10/submission.csv"generate_submissions(output_file, test_predict_class)

转载地址:http://lvili.baihongyu.com/

你可能感兴趣的文章
软件安全性测试转载自小龙虾博客
查看>>
周末参加“北京干部管理职业技术学院”关于高职课程改革的专家讨论会
查看>>
软件测试框架介绍
查看>>
软件自动化测试框架的发展
查看>>
实现haproxy+LNMT负载均衡架构
查看>>
论文浅尝 | 通过共享表示和结构化预测进行事件和事件时序关系的联合抽取
查看>>
论文浅尝 | 融合多粒度信息和外部语言知识的中文关系抽取
查看>>
论文浅尝 | GMNN: Graph Markov Neural Networks
查看>>
廖雪峰Python教程 学习笔记3 hello.py
查看>>
从内核看epoll的实现(基于5.9.9)
查看>>
python与正则表达式
查看>>
安装.Net Framework 4.7.2时出现“不受信任提供程序信任的根证书中终止”的解决方法
查看>>
input type=“button“与input type=“submit“的区别
查看>>
解决Github代码下载慢问题!
查看>>
Web项目实现ServletContextListener接口
查看>>
版本控制工具CVS、SVN、Git介绍
查看>>
MyBatis批量操作SQL
查看>>
MyBatis之SqlSessionFactoryBuilder
查看>>
查看linux服务器配置信息
查看>>
Java集合-HashMap源码
查看>>