前回に引き続き画像認識についての勉強をしていきます。
今回も前回の記事と同様、植物の分類を行う
「Plant Seedling Classification」のコンペのKernelを写経していきます。
機械学習を行う際に、学習させるデータの前加工が大切となってくるみたいなので
今回はOpenCVを活用して画像の前加工を方法を勉強していきます。
以下コードになります。
#使用するライブラリの読み込み %matplotlib inline import os import matplotlib import matplotlib.pyplot as plt import pandas as pd import cv2 import numpy as np from glob import glob import seaborn as sns #データの読み込み BASE_DATA_FOLDER = "E:/Kaggle/plant-seedlings-classification/" TRAin_DATA_FOLDER = os.path.join(BASE_DATA_FOLDER, "train") images_per_class = {} for class_folder_name in os.listdir(TRAin_DATA_FOLDER): class_folder_path = os.path.join(TRAin_DATA_FOLDER, class_folder_name) class_label = class_folder_name images_per_class[class_label] = [] for image_path in glob(os.path.join(class_folder_path, "*.png")): image_bgr = cv2.imread(image_path, cv2.IMREAD_COLOR) images_per_class[class_label].append(image_bgr) #各カテゴリの画像数表示 for key , value in images_per_class.items(): print("{0} -> {1}" . format(key,len(value))) #画像を試しに表示する関数の作成 def plot_for_class(label): nb_rows = 3 nb_cols = 3 fig , axs = plt.subplots(nb_rows,nb_cols, figsize = (6,6)) n = 0 for i in range(0 , nb_rows): for j in range(0 , nb_cols): axs[i, j].xaxis.set_ticklabels([]) axs[i, j].yaxis.set_ticklabels([]) axs[i, j].imshow(images_per_class[label][n]) n += 1 #カテゴリ「Small-flowered Cranesbill」を表示してみる plot_for_class("Small-flowered Cranesbill") #画像の前加工 #マスクの作成 def create_mask_for_plant(image): image_hsv = cv2.cvtColor(image,cv2.COLOR_BGR2HSV) #以下でHSVのカラーを指定 sensitivity = 35 lower_hsv = np.array([60 - sensitivity ,100 , 50]) upper_hsv = np.array([60 + sensitivity ,255,255]) mask = cv2.inRange(image_hsv,lower_hsv,upper_hsv) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11)) mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,kernel) return mask #上で作ったmaskで元のimageを切り抜く(cv2.bitwise_and) def segment_plant(image): mask = create_mask_for_plant(image) output = cv2.bitwise_and(image,image,mask = mask) return output #画像を平滑化(ぼかす) = cv2.GaussianBlur,cv2.addWeightedは2つの画像のパラメータを合成? def sharpen_image(image): image_blurred = cv2.GaussianBlur(image , (0,0),3) image_sharp = cv2.addWeighted(image,1.5,image_blurred,-0.5,0) return image_sharp # 試しに一枚作成してみる image = images_per_class["Small-flowered Cranesbill"][97] image_mask = create_mask_for_plant(image) image_segmented = segment_plant(image) image_sharpen = sharpen_image(image_segmented) fig, axs = plt.subplots(1, 4, figsize=(20, 20)) axs[0].imshow(image) axs[1].imshow(image_mask) axs[2].imshow(image_segmented) axs[3].imshow(image_sharpen) #輪郭を抽出し、様々な情報を取得する(面積とか、一番大きいオブジェクトとか) def find_contours(mask_image): return cv2.findContours(mask_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2] def calculate_largest_contour_area(contours): if len(contours) == 0: return 0 c = max(contours, key=cv2.contourArea) return cv2.contourArea(c) def calculate_contours_area(contours, min_contour_area = 250): area = 0 for c in contours: c_area = cv2.contourArea(c) if c_area >= min_contour_area: area += c_area return area areas = [] larges_contour_areas = [] labels = [] nb_of_contours = [] images_height = [] images_width = [] for class_label in images_per_class.keys(): for image in images_per_class[class_label]: mask = create_mask_for_plant(image) contours = find_contours(mask) area = calculate_contours_area(contours) largest_area = calculate_largest_contour_area(contours) height, width, channels = image.shape images_height.append(height) images_width.append(width) areas.append(area) nb_of_contours.append(len(contours)) larges_contour_areas.append(largest_area) labels.append(class_label) features_df = pd.DataFrame() features_df["label"] = labels features_df["area"] = areas features_df["largest_area"] = larges_contour_areas features_df["number_of_components"] = nb_of_contours features_df["height"] = images_height features_df["width"] = images_width features_df.groupby("label").describe()