PythonMania

普段はロボットとAIを組み合わせて色々作ってます。Python関係以外も色々投稿していくと思います。

【Python】画像の前加工 - OpenCVを使いオブジェクトの特徴を捉える【DeepLeaning】


前回に引き続き画像認識についての勉強をしていきます。



今回も前回の記事と同様、植物の分類を行う

「Plant Seedling Classification」のコンペのKernelを写経していきます。


www.kaggle.com




機械学習を行う際に、学習させるデータの前加工が大切となってくるみたいなので

今回はOpenCVを活用して画像の前加工を方法を勉強していきます。







以下コードになります。

#使用するライブラリの読み込み
%matplotlib inline
import os
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import numpy as np
from glob import glob
import seaborn as sns



#データの読み込み
BASE_DATA_FOLDER = "E:/Kaggle/plant-seedlings-classification/"
TRAin_DATA_FOLDER = os.path.join(BASE_DATA_FOLDER, "train")
images_per_class = {}
for class_folder_name in os.listdir(TRAin_DATA_FOLDER):
    class_folder_path = os.path.join(TRAin_DATA_FOLDER, class_folder_name)
    class_label = class_folder_name
    images_per_class[class_label] = []
    for image_path in glob(os.path.join(class_folder_path, "*.png")):
        image_bgr = cv2.imread(image_path, cv2.IMREAD_COLOR)
        images_per_class[class_label].append(image_bgr)


#各カテゴリの画像数表示
for key , value in images_per_class.items():
    print("{0} -> {1}" . format(key,len(value)))


#画像を試しに表示する関数の作成
def plot_for_class(label):
    nb_rows = 3
    nb_cols = 3
    fig , axs = plt.subplots(nb_rows,nb_cols, figsize = (6,6))
    
    n = 0
    for i in range(0 , nb_rows):
        for j in range(0 , nb_cols):
            axs[i, j].xaxis.set_ticklabels([])
            axs[i, j].yaxis.set_ticklabels([])
            axs[i, j].imshow(images_per_class[label][n])
            n += 1 

#カテゴリ「Small-flowered Cranesbill」を表示してみる
plot_for_class("Small-flowered Cranesbill")


#画像の前加工
#マスクの作成
def create_mask_for_plant(image):
    image_hsv = cv2.cvtColor(image,cv2.COLOR_BGR2HSV)
    #以下でHSVのカラーを指定
    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity ,100 , 50])
    upper_hsv = np.array([60 + sensitivity ,255,255])
    
    mask = cv2.inRange(image_hsv,lower_hsv,upper_hsv)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))
    mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,kernel)
    
    return mask

#上で作ったmaskで元のimageを切り抜く(cv2.bitwise_and)
def segment_plant(image):
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image,image,mask = mask)
    return output

#画像を平滑化(ぼかす) = cv2.GaussianBlur,cv2.addWeightedは2つの画像のパラメータを合成?
def sharpen_image(image):
    image_blurred = cv2.GaussianBlur(image , (0,0),3)
    image_sharp = cv2.addWeighted(image,1.5,image_blurred,-0.5,0)
    return image_sharp


# 試しに一枚作成してみる
image = images_per_class["Small-flowered Cranesbill"][97]

image_mask = create_mask_for_plant(image)
image_segmented = segment_plant(image)
image_sharpen = sharpen_image(image_segmented)

fig, axs = plt.subplots(1, 4, figsize=(20, 20))
axs[0].imshow(image)
axs[1].imshow(image_mask)
axs[2].imshow(image_segmented)
axs[3].imshow(image_sharpen)


#輪郭を抽出し、様々な情報を取得する(面積とか、一番大きいオブジェクトとか)
def find_contours(mask_image):
    return cv2.findContours(mask_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]

def calculate_largest_contour_area(contours):
    if len(contours) == 0:
        return 0
    c = max(contours, key=cv2.contourArea)
    return cv2.contourArea(c)

def calculate_contours_area(contours, min_contour_area = 250):
    area = 0
    for c in contours:
        c_area = cv2.contourArea(c)
        if c_area >= min_contour_area:
            area += c_area
    return area


areas = []
larges_contour_areas = []
labels = []
nb_of_contours = []
images_height = []
images_width = []

for class_label in images_per_class.keys():
    for image in images_per_class[class_label]:
        mask = create_mask_for_plant(image)
        contours = find_contours(mask)
        
        area = calculate_contours_area(contours)
        largest_area = calculate_largest_contour_area(contours)
        height, width, channels = image.shape
        
        images_height.append(height)
        images_width.append(width)
        areas.append(area)
        nb_of_contours.append(len(contours))
        larges_contour_areas.append(largest_area)
        labels.append(class_label)


features_df = pd.DataFrame()
features_df["label"] = labels
features_df["area"] = areas
features_df["largest_area"] = larges_contour_areas
features_df["number_of_components"] = nb_of_contours
features_df["height"] = images_height
features_df["width"] = images_width


features_df.groupby("label").describe()