Welcome to Computer Vision - Object detection
Solution: Object Detection Using YOLO
Task 1: Run the below cell to import all necessary packages required for this hands-on
!alias python=python3
!alias pip=pip3
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
from keras.models import load_model, Model
from keras import backend as K
import tensorflow as tf
from utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body
Task 2: Run the cell, download weights file
!curl -O https://pjreddie.com/media/files/yolov2-tiny-voc.weights
!python3 yad2k.py yolo_tiny.cfg yolov2-tiny-voc.weights model_data/yolo_tiny.h5
Task 3: Filter out the box_scores above a threshold, and return the corresponding scores, boxe dimensions and their classes.
Instructions:-
Filters YOLO boxes based on object and class confidence scores.
Filters YOLO boxes based on object and class confidence scores.
- Extracts class indices of maximum score and their corresponding score values and assign it to variable box_classes and box_class_scores
- Compute box_scores by multiplying box_confidence and box_class_probs
- Parameters:
- - box_confidence: Tensor of shape (grid_size, grid_size, anchors, 1), confidence score of object presence
- - boxes: Tensor of shape (grid_size, grid_size, anchors, 4), bounding box coordinates
- - box_class_probs: Tensor of shape (grid_size, grid_size, anchors, num_classes), class probabilities
- - threshold: float, minimum confidence score for a box to be kept
- Returns:
- - scores: Filtered confidence scores
- - boxes: Filtered bounding box coordinates
- - classes: Class indices of filtered boxes
import tensorflow as tf
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
"""
Filters YOLO boxes based on object and class confidence scores.
"""
# Compute box scores by multiplying confidence with class probabilities
box_scores = box_confidence * box_class_probs
# Get class index and class score with maximum probability
box_classes = tf.argmax(box_scores, axis=-1)
box_class_scores = tf.reduce_max(box_scores, axis=-1)
# Create a mask to filter boxes with scores above threshold
filtering_mask = box_class_scores >= threshold
# Apply mask to extract valid scores, boxes, and class indices
scores = tf.boolean_mask(box_class_scores, filtering_mask)
boxes = tf.boolean_mask(boxes, filtering_mask)
classes = tf.boolean_mask(box_classes, filtering_mask)
return scores, boxes, classes
Task 4: Run the below cell to test the function defined above.
Expected output!
- scores[2] = 18.66819
- boxes[2] = [ 1.9018091 -3.9903622 -4.009363 -3.981121 ]
- classes[2] = 21
- scores.shape = (?,)
- boxes.shape = (?, 4)
- classes.shape = (?,)
with tf.Session() as test_a:
box_confidence = tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed = 51)
boxes = tf.random_normal([19, 19, 5, 4], mean=1, stddev=4, seed = 51)
box_class_probs = tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed = 51)
scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.5)
print("scores[2] = " + str(scores[2].eval()))
print("boxes[2] = " + str(boxes[2].eval()))
print("classes[2] = " + str(classes[2].eval()))
print("scores.shape = " + str(scores.shape))
print("boxes.shape = " + str(boxes.shape))
print("classes.shape = " + str(classes.shape))
# Output:
'''
scores[2] = 18.66819
boxes[2] = [-0.40550387 1.6398532 -1.2696881 1.3711817 ]
classes[2] = 21
scores.shape = (?,)
boxes.shape = (?, 4)
classes.shape = (?,)
'''
Task 5: Define the function yolo_non_max_supression to implement nonmax suppression to eliminate overlapping boxes.
import tensorflow as tf
def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
# Step 1: Perform non-max suppression using tf.image.non_max_suppression
nms_indices = tf.image.non_max_suppression(
boxes=boxes,
scores=scores,
max_output_size=max_boxes,
iou_threshold=iou_threshold
)
# Step 2: Gather the boxes, scores, and classes for the selected indices
scores = tf.gather(scores, nms_indices)
boxes = tf.gather(boxes, nms_indices)
classes = tf.gather(classes, nms_indices)
return scores, boxes, classes
Task 6: Run the below cell to validate the function defined above.
Expected output!
- scores[2] = 7.2168345
- boxes[2] = [ 1.3835918 -2.2073252 2.5128307 -10.085559 ]
- classes[2] = -5.1675873
- scores.shape = (10,)
- boxes.shape = (10, 4)
- classes.shape = (10,)
with tf.Session() as test_b:
scores = tf.random_normal([54,], mean=1, stddev=4, seed = 51)
boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed = 51)
classes = tf.random_normal([54,], mean=1, stddev=4, seed = 51)
scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
init=tf.global_variables_initializer()
test_b.run(init)
print("scores[2] = " + str(scores[2].eval()))
print("boxes[2] = " + str(boxes[2].eval()))
print("classes[2] = " + str(classes[2].eval()))
print("scores.shape = " + str(scores.eval().shape))
print("boxes.shape = " + str(boxes.eval().shape))
print("classes.shape = " + str(classes.eval().shape))
# Output:
'''
scores[2] = 7.2168345
boxes[2] = [ 1.3835918 -2.2073252 2.5128307 -10.085559 ]
classes[2] = -5.1675873
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)
'''
Task 7: Define the function yolo_eval that consolidates all the function defined above to extract single prediction boxes for each object.
import tensorflow as tf
def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=5, score_threshold=0.6, iou_threshold=0.5):
# Extract the components of the YOLO outputs
box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
# Step 1: Convert box predictions to corners
boxes = yolo_boxes_to_corners(box_xy, box_wh)
# Step 2: Filter boxes based on confidence scores
scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=score_threshold)
# Step 3: Scale the boxes to match the image dimensions
boxes = scale_boxes(boxes, image_shape)
# Step 4: Apply non-max suppression to filter overlapping boxes
scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes=max_boxes, iou_threshold=iou_threshold)
return scores, boxes, classes
Task 8: Run the below cell to vlidate yolo_eval function defined above.
Expected output!
- scores[2] = 155.1541
- boxes[2] = [ 109.86103 -5404.5874 329.5831 -16213.762 ]
- classes[2] = 16
- scores.shape = (5,)
- boxes.shape = (5, 4)
- classes.shape = (5,)
with tf.Session() as test_b:
yolo_outputs = (tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 51),
tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 51),
tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed = 51),
tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed = 51))
scores, boxes, classes = yolo_eval(yolo_outputs)
init=tf.global_variables_initializer()
test_b.run(init)
print("scores[2] = " + str(scores[2].eval()))
print("boxes[2] = " + str(boxes[2].eval()))
print("classes[2] = " + str(classes[2].eval()))
print("scores.shape = " + str(scores.eval().shape))
print("boxes.shape = " + str(boxes.eval().shape))
print("classes.shape = " + str(classes.eval().shape))
# Output:
'''
scores[2] = 155.1541
boxes[2] = [ 109.86103 -5404.5874 329.5831 -16213.762 ]
classes[2] = 16
scores.shape = (5,)
boxes.shape = (5, 4)
classes.shape = (5,)
'''
Task 9: Run the below cell to load the model.
sess = K.get_session()
class_names = read_classes("model_data/pascal_classes.txt")
anchors = read_anchors("model_data/yolo_tiny_anchors.txt")
image_shape = (375., 500.)
yolo_model = load_model("model_data/yolo_tiny.h5")
yolo_model.summary()
Task 10: Extract predictions:
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) #### yolo_head() return box scores, dimensions and claases for each grid cell
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape) ### Filter boxes
def predict(sess, image_file):
# Preprocess the image
image, image_data = preprocess_image(image_file, model_image_size=(416, 416))
# Initialize all variables
sess.run(tf.global_variables_initializer()) # Initialize the model variables
# Run the session to get predictions
out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes],
feed_dict={yolo_model.input: image_data, K.learning_phase(): 0})
print('Found {} boxes for {}'.format(len(out_boxes), image_file))
# Generate colors for the classes
colors = generate_colors(class_names)
# Draw the predicted boxes on the image
draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
# Save the output image
image.save(os.path.join("out", image_file), quality=90)
# Display the image
output_image = scipy.misc.imread(os.path.join("out", image_file))
imshow(output_image)
return out_scores, out_boxes, out_classes
Task 11: Run the below cell to detect objects on an image file.
predict(sess, "men.JPEG")