本文共 13544 字,大约阅读时间需要 45 分钟。
本文将介绍:
通过下面的链接,下载dataset到本地目录cifar10
中
import matplotlib as mplimport matplotlib.pyplot as pltimport numpy as npimport osimport pandas as pdimport sklearnimport sysimport tensorflow as tfimport timefrom tensorflow import kerasprint(tf.__version__)print(sys.version_info)for module in mpl, np, pd, sklearn, tf, keras: print(module.__name__, module.__version__) # 一,实现tensorflow动态按需分配GPUfrom tensorflow.compat.v1 import ConfigProtofrom tensorflow.compat.v1 import InteractiveSessionconfig = ConfigProto()config.gpu_options.allow_growth = Truesession = InteractiveSession(config=config)
# 二,读取训练集、测试集的csv文件数据和训练集、测试集数据对应关系class_names = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck',]train_lables_file = './cifar10/trainLabels.csv'test_csv_file = './cifar10/sampleSubmission.csv'train_folder = './cifar10/train/'test_folder = './cifar10/test'def parse_csv_file(filepath, folder): """Parses csv files into (filename(path), label) format""" results = [] with open(filepath, 'r') as f: lines = f.readlines()[1:] for line in lines: image_id, label_str = line.strip('\n').split(',') image_full_path = os.path.join(folder, image_id + '.png') results.append((image_full_path, label_str)) return resultstrain_labels_info = parse_csv_file(train_lables_file, train_folder)test_csv_info = parse_csv_file(test_csv_file, test_folder)import pprintpprint.pprint(train_labels_info[0:5])pprint.pprint(test_csv_info[0:5])print(len(train_labels_info), len(test_csv_info))
# 四,将对应关系转换为dataframe类型# train_df = pd.DataFrame(train_labels_info)train_df = pd.DataFrame(train_labels_info[0:45000])valid_df = pd.DataFrame(train_labels_info[45000:])test_df = pd.DataFrame(test_csv_info)train_df.columns = ['filepath', 'class']valid_df.columns = ['filepath', 'class']test_df.columns = ['filepath', 'class']print(train_df.head())print(valid_df.head())print(test_df.head())
# 五,使用ImageDataGenerator加载数据并做数据增强height = 32width = 32channels = 3batch_size = 32num_classes = 10train_datagen = keras.preprocessing.image.ImageDataGenerator( rescale = 1./255, rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, fill_mode = 'nearest',)train_generator = train_datagen.flow_from_dataframe( train_df, directory = './', x_col = 'filepath', y_col = 'class', classes = class_names, target_size = (height, width), batch_size = batch_size, seed = 7, shuffle = True, class_mode = 'sparse',)valid_datagen = keras.preprocessing.image.ImageDataGenerator( rescale = 1./255)valid_generator = valid_datagen.flow_from_dataframe( valid_df, directory = './', x_col = 'filepath', y_col = 'class', classes = class_names, target_size = (height, width), batch_size = batch_size, seed = 7, shuffle = False, class_mode = "sparse")train_num = train_generator.samplesvalid_num = valid_generator.samplesprint(train_num, valid_num)
for i in range(2): x, y = train_generator.next() print(x.shape, y.shape) print(y)
# 七,构建模型model = keras.models.Sequential([ keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu', input_shape=[width, height, channels]), keras.layers.BatchNormalization(), keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.MaxPool2D(pool_size=2), keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.MaxPool2D(pool_size=2), keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.MaxPool2D(pool_size=2), keras.layers.Flatten(), keras.layers.Dense(512, activation='relu'), keras.layers.Dense(num_classes, activation='softmax'),])model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=['accuracy'])model.summary()
# 八,训练模型epochs = 20history = model.fit_generator(train_generator, steps_per_epoch = train_num // batch_size, epochs = epochs, validation_data = valid_generator, validation_steps = valid_num // batch_size)
# 九,打印模型训练曲线def plot_learning_curves(history, label, epcohs, min_value, max_value): data = { } data[label] = history.history[label] data['val_'+label] = history.history['val_'+label] pd.DataFrame(data).plot(figsize=(8, 5)) plt.grid(True) plt.axis([0, epochs, min_value, max_value]) plt.show() plot_learning_curves(history, 'accuracy', epochs, 0, 1)plot_learning_curves(history, 'loss', epochs, 0, 2)
# 十,使用keras.ImageDataGenerator加载测试集数据test_datagen = keras.preprocessing.image.ImageDataGenerator( rescale = 1./255)test_generator = valid_datagen.flow_from_dataframe( test_df, directory = './', x_col = 'filepath', y_col = 'class', classes = class_names, target_size = (height, width), batch_size = batch_size, seed = 7, shuffle = False, class_mode = "sparse")test_num = test_generator.samplesprint(test_num)
# 十一,使用测试集预测模型结果test_predict = model.predict_generator(test_generator, workers = 10, use_multiprocessing = True)
print(test_predict.shape)
print(test_predict[0:5])
test_predict_class_indices = np.argmax(test_predict, axis = 1)
print(test_predict_class_indices[0:5])
test_predict_class = [class_names[index] for index in test_predict_class_indices]
print(test_predict_class[0:5])
# 十二,将预测结果写入到submission.csv文件中,并在kaggle上提交def generate_submissions(filename, predict_class): with open(filename, 'w') as f: f.write('id,label\n') for i in range(len(predict_class)): f.write('%d,%s\n' % (i+1, predict_class[i]))output_file = "./cifar10/submission.csv"generate_submissions(output_file, test_predict_class)
#!/usr/bin/env python3# -*- coding: utf-8 -*-import matplotlib as mplimport matplotlib.pyplot as pltimport numpy as npimport osimport pandas as pdimport sklearnimport sysimport tensorflow as tfimport timefrom tensorflow import kerasprint(tf.__version__)print(sys.version_info)for module in mpl, np, pd, sklearn, tf, keras: print(module.__name__, module.__version__) # 一,实现tensorflow动态按需分配GPUfrom tensorflow.compat.v1 import ConfigProtofrom tensorflow.compat.v1 import InteractiveSessionconfig = ConfigProto()config.gpu_options.allow_growth = Truesession = InteractiveSession(config=config) # 二,读取训练集、测试集的csv文件数据和训练集、测试集数据对应关系class_names = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck',]train_lables_file = './cifar10/trainLabels.csv'test_csv_file = './cifar10/sampleSubmission.csv'train_folder = './cifar10/train/'test_folder = './cifar10/test'def parse_csv_file(filepath, folder): """Parses csv files into (filename(path), label) format""" results = [] with open(filepath, 'r') as f: lines = f.readlines()[1:] for line in lines: image_id, label_str = line.strip('\n').split(',') image_full_path = os.path.join(folder, image_id + '.png') results.append((image_full_path, label_str)) return resultstrain_labels_info = parse_csv_file(train_lables_file, train_folder)test_csv_info = parse_csv_file(test_csv_file, test_folder)import pprintpprint.pprint(train_labels_info[0:5])pprint.pprint(test_csv_info[0:5])print(len(train_labels_info), len(test_csv_info))# 四,将对应关系转换为dataframe类型# train_df = pd.DataFrame(train_labels_info)train_df = pd.DataFrame(train_labels_info[0:45000])valid_df = pd.DataFrame(train_labels_info[45000:])test_df = pd.DataFrame(test_csv_info)train_df.columns = ['filepath', 'class']valid_df.columns = ['filepath', 'class']test_df.columns = ['filepath', 'class']print(train_df.head())print(valid_df.head())print(test_df.head())# 五,使用ImageDataGenerator加载数据并做数据增强height = 32width = 32channels = 3batch_size = 32num_classes = 10train_datagen = keras.preprocessing.image.ImageDataGenerator( rescale = 1./255, rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, fill_mode = 'nearest',)train_generator = train_datagen.flow_from_dataframe( train_df, directory = './', x_col = 'filepath', y_col = 'class', classes = class_names, target_size = (height, width), batch_size = batch_size, seed = 7, shuffle = True, class_mode = 'sparse',)valid_datagen = keras.preprocessing.image.ImageDataGenerator( rescale = 1./255)valid_generator = valid_datagen.flow_from_dataframe( valid_df, directory = './', x_col = 'filepath', y_col = 'class', classes = class_names, target_size = (height, width), batch_size = batch_size, seed = 7, shuffle = False, class_mode = "sparse")train_num = train_generator.samplesvalid_num = valid_generator.samplesprint(train_num, valid_num)# 六,查看generator数据for i in range(2): x, y = train_generator.next() print(x.shape, y.shape) print(y) # 七,构建模型model = keras.models.Sequential([ keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu', input_shape=[width, height, channels]), keras.layers.BatchNormalization(), keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.MaxPool2D(pool_size=2), keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.MaxPool2D(pool_size=2), keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'), keras.layers.BatchNormalization(), keras.layers.MaxPool2D(pool_size=2), keras.layers.Flatten(), keras.layers.Dense(512, activation='relu'), keras.layers.Dense(num_classes, activation='softmax'),])model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=['accuracy'])model.summary()# 八,训练模型epochs = 20history = model.fit_generator(train_generator, steps_per_epoch = train_num // batch_size, epochs = epochs, validation_data = valid_generator, validation_steps = valid_num // batch_size)# 九,打印模型训练曲线def plot_learning_curves(history, label, epcohs, min_value, max_value): data = { } data[label] = history.history[label] data['val_'+label] = history.history['val_'+label] pd.DataFrame(data).plot(figsize=(8, 5)) plt.grid(True) plt.axis([0, epochs, min_value, max_value]) plt.show() plot_learning_curves(history, 'accuracy', epochs, 0, 1)plot_learning_curves(history, 'loss', epochs, 0, 2)# 十,使用keras.ImageDataGenerator加载测试集数据test_datagen = keras.preprocessing.image.ImageDataGenerator( rescale = 1./255)test_generator = valid_datagen.flow_from_dataframe( test_df, directory = './', x_col = 'filepath', y_col = 'class', classes = class_names, target_size = (height, width), batch_size = batch_size, seed = 7, shuffle = False, class_mode = "sparse")test_num = test_generator.samplesprint(test_num)# 十一,使用测试集预测模型结果test_predict = model.predict_generator(test_generator, workers = 10, use_multiprocessing = True)# 1,测试集预测模型结果维度形状print(test_predict.shape)# 2,抽取前5条数据查看print(test_predict[0:5])# 3,取结果数值为最大的为预测结果test_predict_class_indices = np.argmax(test_predict, axis = 1)# 4,取前5条结果查看print(test_predict_class_indices[0:5])# 5,将结果转化为特征名称test_predict_class = [class_names[index] for index in test_predict_class_indices]# 查看前五条结果print(test_predict_class[0:5])# 十二,将预测结果写入到submission.csv文件中,并在kaggle上提交def generate_submissions(filename, predict_class): with open(filename, 'w') as f: f.write('id,label\n') for i in range(len(predict_class)): f.write('%d,%s\n' % (i+1, predict_class[i]))output_file = "./cifar10/submission.csv"generate_submissions(output_file, test_predict_class)
转载地址:http://lvili.baihongyu.com/