データセットがカテゴリ(ラベル)毎にフォルダ分けされている場合
BASE_DATA_FOLDER = "../input/" TRAin_DATA_FOLDER = os.path.join(BASE_DATA_FOLDER, "train") images_per_class = {} for class_folder_name in os.listdir(TRAin_DATA_FOLDER): class_folder_path = os.path.join(TRAin_DATA_FOLDER, class_folder_name) class_label = class_folder_name images_per_class[class_label] = [] for image_path in glob(os.path.join(class_folder_path, "*.png")): image_bgr = cv2.imread(image_path, cv2.IMREAD_COLOR) images_per_class[class_label].append(image_bgr)
z = glob.glob("../input/train/*/*.png") ori_label = [] ori_imgs = [] for fn in z: if fn[-3:] != 'png': continue ori_label.append(fn.split('/')[-2]) new_img = Image.open(fn) ori_imgs.append(ImageOps.fit(new_img, (48, 48), Image.ANTIALIAS).convert('RGB'))
#画像のサイズ指定 ScaleTo = 70 seed = 7 #トレーニングデータの読み込み path = "./train/*/*.png" files = glob(path) trainImg = [] trainLabel = [] j = 1 num = len(files) for img in files: print(str(j) + "/" + str(num) , end="\r") trainImg.append(cv2.resize(cv2.imread(img) ,(ScaleTo,ScaleTo))) trainLabel.append(img.split("/")[-2]) j += 1 trainImg = np.asarray(trainImg) trainLabel = pd.DataFrame(trainLabel)
ImageDataBunchの作成
#トレーニングデータの読み込み data_path = Path('../input') df = pd.read_csv(data_path/'train.csv') df.head() #提出用データの読み込み sub_csv = pd.read_csv(data_path/'sample_submission.csv') sub_csv.head() #データ束の作成 def create_databunch(valid_idx): test = ImageList.from_df(sub_csv, path=data_path/'test', folder='test') data = (ImageList.from_df(df, path=data_path/'train', folder='train') .split_by_idx(valid_idx) .label_from_df() .add_test(test) .transform(get_transforms(flip_vert=True, max_rotate=20.0), size=128) .databunch(path='.', bs=64) .normalize(imagenet_stats) ) return data
画像データとラベルが別々に保存されている場合
#トレーニングデータ、テストデータ、ラベルデータのパス読み込み train_dir = "../input/train/train/" test_dir = "../input/test/test/" train_df = pd.read_csv('../input/train.csv') train_df.head() X_tr = [] Y_tr = [] images = train_df["id"].values for img_id in tqdm_notebook(images): X_tr.append(cv2.imread(train_dir+img_id)) Y_tr.append(train_df[train_df["id"]==img_id]["has_cactus"].values[0]) X_tr = np.asarray(X_tr) X_tr = X_tr.astype('float32') X_tr /= 255 Y_tr = np.asarray(Y_tr)
Zip以下に配置されたフォルダに画像がある場合
#ファイルディレクトリの指定 data_folder = Path("../input") train_df = pd.read_csv("../input/train.csv") test_df = pd.read_csv("../input/sample_submission.csv") #学習用データの読み込み train_img = (ImageList.from_df(train_df,path=data_folder/"train",folder="train") .split_by_rand_pct(0.01) .label_from_df() .add_test(test_img) .transform(trfm,size=128) .databunch(path=".",bs=64,device=torch.device('cuda:0')) .normalize(imagenet_stats))