
from sklearn.model_selection import train_test_split
import numpy as np
from PIL import Image
import os
from glob import glob
from sklearn.preprocessing import LabelEncoder
import cv2
ScaleTo = 70
seed = 7
data_dir = "~/train"
path = "~/train/*/*.png"
files = glob(path)
trainImg = []
trainLabel = []
j = 1
num = len(files)
for img in files:
print(str(j) + "/" + str(num) , end="\r")
trainImg.append(cv2.resize(cv2.imread(img) ,(ScaleTo,ScaleTo)))
j += 1
for dir in os.listdir(data_dir):
if dir == ".DS_Store":
continue
dir1 = data_dir + "/" + dir
label = dir
for file in os.listdir(dir1):
if file != "Thumbs.db":
trainLabel.append(label)
image_list = np.array(trainImg)
label_list = np.array(trainLabel)
le = LabelEncoder()
le = le.fit(label_list)
label_list = le.transform(label_list)
label_list = np_utils.to_categorical(label_list)
X_train, X_test, y_train, y_test = train_test_split(image_list, label_list, test_size=0.33, random_state=111)