Welcome to Computer Vision - Object detection

Solution: Object Detection Using YOLO

Task 1: Run the below cell to import all necessary packages required for this hands-on

!alias python=python3
!alias pip=pip3
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.misc
import numpy as np
import pandas as pd
import PIL
from keras.models import load_model, Model
from keras import backend as K
import tensorflow as tf

from utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body

Task 2: Run the cell, download weights file

!curl -O 

!python3 yolo_tiny.cfg yolov2-tiny-voc.weights model_data/yolo_tiny.h5

Task 3: Filter out the box_scores above a threshold, and return the corresponding scores, boxe dimensions and their classes.

Filters YOLO boxes based on object and class confidence scores.
  • Extracts class indices of maximum score and their corresponding score values and assign it to variable box_classes and box_class_scores
  • Compute box_scores by multiplying box_confidence and box_class_probs
  • Parameters:
    • - box_confidence: Tensor of shape (grid_size, grid_size, anchors, 1), confidence score of object presence
    • - boxes: Tensor of shape (grid_size, grid_size, anchors, 4), bounding box coordinates
    • - box_class_probs: Tensor of shape (grid_size, grid_size, anchors, num_classes), class probabilities
    • - threshold: float, minimum confidence score for a box to be kept
  • Returns:
    • - scores: Filtered confidence scores
    • - boxes: Filtered bounding box coordinates
    • - classes: Class indices of filtered boxes

import tensorflow as tf

def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    Filters YOLO boxes based on object and class confidence scores.

    # Compute box scores by multiplying confidence with class probabilities
    box_scores = box_confidence * box_class_probs  

    # Get class index and class score with maximum probability
    box_classes = tf.argmax(box_scores, axis=-1)  
    box_class_scores = tf.reduce_max(box_scores, axis=-1)

    # Create a mask to filter boxes with scores above threshold
    filtering_mask = box_class_scores >= threshold  

    # Apply mask to extract valid scores, boxes, and class indices
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)

    return scores, boxes, classes

Task 4: Run the below cell to test the function defined above.

Expected output!
  • scores[2] = 18.66819
  • boxes[2] = [ 1.9018091 -3.9903622 -4.009363 -3.981121 ]
  • classes[2] = 21
  • scores.shape = (?,)
  • boxes.shape = (?, 4)
  • classes.shape = (?,)

with tf.Session() as test_a:
    box_confidence = tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed = 51)
    boxes = tf.random_normal([19, 19, 5, 4], mean=1, stddev=4, seed = 51)
    box_class_probs = tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed = 51)
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.5)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.shape))
    print("boxes.shape = " + str(boxes.shape))
    print("classes.shape = " + str(classes.shape))
# Output:
scores[2] = 18.66819
boxes[2] = [-0.40550387  1.6398532  -1.2696881   1.3711817 ]
classes[2] = 21
scores.shape = (?,)
boxes.shape = (?, 4)
classes.shape = (?,)

Task 5: Define the function yolo_non_max_supression to implement nonmax suppression to eliminate overlapping boxes.

import tensorflow as tf

def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    # Step 1: Perform non-max suppression using tf.image.non_max_suppression
    nms_indices = tf.image.non_max_suppression(
    # Step 2: Gather the boxes, scores, and classes for the selected indices
    scores = tf.gather(scores, nms_indices)
    boxes = tf.gather(boxes, nms_indices)
    classes = tf.gather(classes, nms_indices)
    return scores, boxes, classes

Task 6: Run the below cell to validate the function defined above.

Expected output!
  • scores[2] = 7.2168345
  • boxes[2] = [ 1.3835918 -2.2073252 2.5128307 -10.085559 ]
  • classes[2] = -5.1675873
  • scores.shape = (10,)
  • boxes.shape = (10, 4)
  • classes.shape = (10,)

with tf.Session() as test_b:
    scores = tf.random_normal([54,], mean=1, stddev=4, seed = 51)
    boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed = 51)
    classes = tf.random_normal([54,], mean=1, stddev=4, seed = 51)
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))
# Output:
scores[2] = 7.2168345
boxes[2] = [  1.3835918  -2.2073252   2.5128307 -10.085559 ]
classes[2] = -5.1675873
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)

Task 7: Define the function yolo_eval that consolidates all the function defined above to extract single prediction boxes for each object.

import tensorflow as tf

def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=5, score_threshold=0.6, iou_threshold=0.5):
    # Extract the components of the YOLO outputs
    box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs

    # Step 1: Convert box predictions to corners
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    # Step 2: Filter boxes based on confidence scores
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=score_threshold)

    # Step 3: Scale the boxes to match the image dimensions
    boxes = scale_boxes(boxes, image_shape)
    # Step 4: Apply non-max suppression to filter overlapping boxes
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes=max_boxes, iou_threshold=iou_threshold)
    return scores, boxes, classes

Task 8: Run the below cell to vlidate yolo_eval function defined above.

Expected output!
  • scores[2] = 155.1541
  • boxes[2] = [ 109.86103 -5404.5874 329.5831 -16213.762 ]
  • classes[2] = 16
  • scores.shape = (5,)
  • boxes.shape = (5, 4)
  • classes.shape = (5,)

with tf.Session() as test_b:
    yolo_outputs = (tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 51),
                    tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 51),
                    tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed = 51),
                    tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed = 51))
    scores, boxes, classes = yolo_eval(yolo_outputs)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))

# Output:
scores[2] = 155.1541
boxes[2] = [   109.86103  -5404.5874     329.5831  -16213.762  ]
classes[2] = 16
scores.shape = (5,)
boxes.shape = (5, 4)
classes.shape = (5,)

Task 9: Run the below cell to load the model.

sess = K.get_session()
class_names = read_classes("model_data/pascal_classes.txt")
anchors = read_anchors("model_data/yolo_tiny_anchors.txt")
image_shape = (375., 500.)
yolo_model = load_model("model_data/yolo_tiny.h5")

Task 10: Extract predictions:

yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) #### yolo_head() return box scores, dimensions and claases for each grid cell
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape) ### Filter boxes

def predict(sess, image_file):
    # Preprocess the image
    image, image_data = preprocess_image(image_file, model_image_size=(416, 416))

    # Initialize all variables  # Initialize the model variables
    # Run the session to get predictions
    out_scores, out_boxes, out_classes =[scores, boxes, classes], 
                                                  feed_dict={yolo_model.input: image_data, K.learning_phase(): 0})

    print('Found {} boxes for {}'.format(len(out_boxes), image_file))

    # Generate colors for the classes
    colors = generate_colors(class_names)
    # Draw the predicted boxes on the image
    draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
    # Save the output image"out", image_file), quality=90)
    # Display the image
    output_image = scipy.misc.imread(os.path.join("out", image_file))
    return out_scores, out_boxes, out_classes

Task 11: Run the below cell to detect objects on an image file.

predict(sess, "men.JPEG")

