Source code for face_detection_process

import sys
import os
sys.path.append(os.path.join(os.path.expanduser('~'), 'facenet', 'src'))
import svgwrite
import cv2
import numpy as np
# import imutils
import time
import argparse
import redis

from align import detect_face
from facenet import prewhiten, crop, flip, to_rgb

import tensorflow as tf
import facenet
import os
import sys
import pickle
from scipy import misc
from scipy.spatial.distance import cdist
from imutils.video import FileVideoStream
from glob import glob
import time
from boto.s3.key import Key
import boto
import boto3
from base64 import b64decode
# from user_definition import key_id, access_key


[docs]def load_img(img, do_random_crop, do_random_flip, image_size,
             do_prewhiten=True):
    '''
    Process the captured images from the webcam, prewhitening, cropping and
    flipping as required. Returns processed image.
    '''
    images = np.zeros((1, image_size, image_size, 3))
    if img.ndim == 2:
        img = to_rgb(img)
    if do_prewhiten:
        img = prewhiten(img)
    img = crop(img, do_random_crop, image_size)
    img = flip(img, do_random_flip)
    images[:, :, :, :] = img
    return images


[docs]def load_and_align_data(image_paths, image_size=160, margin=44):
    """
    Load user images and detect faces. Returns processed images with faces
    detected in them.
    """
    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    print('Creating networks and loading parameters')
    with tf.Graph().as_default():
        sess = tf.Session()
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)

    img_list = []
    nrof_samples = len(image_paths)
    success_paths = []
    for img_path in image_paths:
        img = misc.imread(img_path, mode='RGB')
        img_size = np.asarray(img.shape)[0:2]
        bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet,
                                                    onet, threshold, factor)
        if len(bounding_boxes) < 1:
            print("no face found in {}".format(img_path))
            continue
        det = np.squeeze(bounding_boxes[0, 0:4])
        bb = np.zeros(4, dtype=np.int32)
        bb[0] = np.maximum(det[0]-margin/2, 0)
        bb[1] = np.maximum(det[1]-margin/2, 0)
        bb[2] = np.minimum(det[2]+margin/2, img_size[1])
        bb[3] = np.minimum(det[3]+margin/2, img_size[0])
        cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
        aligned = misc.imresize(cropped, (image_size, image_size),
                                interp='bilinear')
        prewhitened = prewhiten(aligned)
        img_list.append(prewhitened)
        success_paths.append(img_path)
    images = np.stack(img_list)
    return success_paths, images


[docs]def resize_image(img, image_size, do_prewhiten=True):
    '''
    Processes a single image by whitening and cropping
    '''
    print(img.shape)
    if img.ndim == 2:
        img = to_rgb(img)
    if do_prewhiten:
        img = prewhiten(img)
    img = crop(img, False, image_size)
    img = flip(img, False)
    img = np.expand_dims(img, axis=0)
    # images[:,:,:,:] = img
    # return images
    return img


[docs]def align_face(img, pnet, rnet, onet):
    '''
    Detect and align faces from a frame, returning the detected faces and
    the bounding boxes for the faces.
    '''
    print("start detect")
    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    image_size = 160

    if img.size == 0:
        print("empty array")
        return False, img, [0, 0, 0, 0]

    if img.ndim < 2:
        print('Unable to align')

    if img.ndim == 2:
        img = to_rgb(img)

    img = img[:, :, 0:3]
    margin = 44

    bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet,
                                                threshold, factor)
    print("done detect")
    nrof_faces = bounding_boxes.shape[0]
    detect_multiple_faces = True
    if nrof_faces == 0:
        return False, img, [0, 0, 0, 0]
    else:
        det = bounding_boxes[:, 0:4]
        det_arr = []
        img_size = np.asarray(img.shape)[0:2]
        if nrof_faces > 1:
            if detect_multiple_faces:
                for i in range(nrof_faces):
                    det_arr.append(np.squeeze(det[i]))
            else:
                bounding_box_size = (det[:, 2]-det[:, 0])*(det[:, 3]-det[:, 1])
                img_center = img_size / 2
                offsets = np.vstack([(det[:, 0]+det[:, 2])/2-img_center[1],
                                     (det[:, 1]+det[:, 3])/2-img_center[0]])
                offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
                # some extra weight on the centering
                index = np.argmax(bounding_box_size-offset_dist_squared*2.0)
                det_arr.append(det[index, :])
        else:
            det_arr.append(np.squeeze(det))
        if len(det_arr) > 0:
                faces = []
                bboxes = []
        for i, det in enumerate(det_arr):
            det = np.squeeze(det)
            bb = np.zeros(4, dtype=np.int32)
            bb[0] = np.maximum(det[0]-margin/2, 0)
            bb[1] = np.maximum(det[1]-margin/2, 0)
            bb[2] = np.minimum(det[2]+margin/2, img_size[1])
            bb[3] = np.minimum(det[3]+margin/2, img_size[0])
            cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
            scaled = misc.imresize(cropped, (image_size, image_size),
                                   interp='bilinear')
            faces.append(scaled)
            bboxes.append(bb)
        print("leaving align face")
        return True, faces, bboxes


[docs]def identify_person(image_vector, feature_names, feature_np, k=9):
    '''
    Calculates the Euclidean distance between a face embedding and the
    stored embeddings, returning the identity of the stored embedding most
    similar to the face embedding and the distance between these embeddings.
    '''
    d = np.squeeze(cdist(image_vector, feature_np, metric='euclidean'))
    top_k_ind = np.argsort(d).tolist()[:k]
    result = feature_names[top_k_ind[0]]
    distance = d[top_k_ind[0]]
    name = result.split("_")[0]
    return name, distance


[docs]def write_svg_facenet_emb(stream_url):
    '''
    Reads the facenet model and the saved embeddings from disk, and connects to
    the in-memory Redis database. Detects faces in the specified stream and
    calculates the corresponding bounding boxes. Writes the bounding boxes for
    all detected and identified faces to an svg overlay which is then saved to
    Redis to be accessed by other processes.
    '''
    dim1, dim2 = "1280px", "720px"

    print("[INFO] opening redis connection")
    redis_db = redis.StrictRedis(host="localhost", port=6379, db=0)
    # load our serialized model from disk
    print("[INFO] loading model...")
    # with open('extracted_dict.pickle', 'rb') as f:
    # TODO CHANGE THIS
    with open('startup_extracted_dict.pickle', 'rb') as f:
        feature_dict = pickle.load(f)
    feature_names = np.array(list(feature_dict.keys()))
    feature_np = np.squeeze(list(feature_dict.values()))

    # model_exp = "20180408-102900/"
    model_exp = "20180402-114759"
    graph_fr = tf.Graph()
    sess_fr = tf.Session(graph=graph_fr)

    with graph_fr.as_default():
        print("Loading graph")
        saverf = tf.train.import_meta_graph(os.path.join(model_exp,
                                            'model-20180402-114759.meta'))
        saverf.restore(sess_fr, os.path.join(model_exp,
                                             'model-20180402-114759.ckpt-275'))
        pnet, rnet, onet = detect_face.create_mtcnn(sess_fr, None)
        sess = sess_fr
        images_placeholder = sess.graph.get_tensor_by_name("input:0")
        images_placeholder = tf.image.resize_images(images_placeholder,
                                                    (160, 160))
        embeddings = sess.graph.get_tensor_by_name("embeddings:0")
        phase_train_placeholder = sess.graph.\
            get_tensor_by_name("phase_train:0")
        image_size = 160
        embedding_size = embeddings.get_shape()[1]
        print("Starting prediction")

        # fvs = FileVideoStream(stream_url, queue_size=1).start()
        use_fvs = False
        use_buffer = True

        if use_fvs:
            fvs = FileVideoStream(stream_url, queue_size=30).start()
        else:
            capture = cv2.VideoCapture(stream_url)
            print("got capture")

        # capture = cv2.VideoCapture(stream_url)
        while(True):
            print("capture frame")

            # for k in range(100):
            #    start_time = time.time()
            #    r = capture.grab()
            #    delta = time.time() - start_time
            #    if delta > 0.09:
            #        break
            # ret, frame = capture.retrieve()

            # capture = cv2.VideoCapture(stream_url)
            # ret, frame = capture.read()
            if redis_db.get('create_embs'):
                print("create temp svg")
                svg_document = svgwrite.Drawing(size=(dim1, dim2))
                svg_document.add(svg_document.rect(
                 insert=(0, 0),
                 size=(dim1, dim2),
                 stroke_width="10",
                 stroke="green",
                 fill="rgb(0,0,0)",
                 fill_opacity=0)
                 )

                # text_style = "font-size:40px; font-family:Courier New;\
                text_style = "font-size:70px; font-family:Courier New;\
                      fill:rgb(255,0,0); "
                svg_document.add(svg_document.text(
                 "Adding",
                 insert=(100, 200),
                 fill="black",
                 style=text_style)
                 )
                text_style = "font-size:70px; font-family:Courier New;\
                                      fill:rgb(255,0,0); "
                svg_document.add(svg_document.text(
                 "person",
                 insert=(100, 300),
                 fill="black",
                 style=text_style)
                 )

                svg_string = svg_document.tostring()
                print("write tmp overlay")
                redis_db.set('overlay', svg_string)

                print("create embs now")
                filenames = glob("photos/*")
                success_filenames, aligned_images = load_and_align_data(
                    filenames)
                print("success filenames \n\n\n\n\n\n")
                print(success_filenames)
                # bucket_name = 'msds603camera' # Change it to your bucket.
                # s3_connection = boto.connect_s3(aws_access_key_id=key_id,
                #                                aws_secret_access_key=access_key)
                # bucket = s3_connection.get_bucket(bucket_name)
                for fname, img in zip(success_filenames, aligned_images):
                    print("start run emb")
                    images = resize_image(img, image_size)
                    feed_dict = {images_placeholder: images,
                                 phase_train_placeholder: False}
                    feature_vector = sess.run(embeddings, feed_dict=feed_dict)
                    name, count = fname.split("/")[-1].rsplit("."
                                                              )[0].split("_")
                    feature_dict["{}_{}".format(name, count)] = feature_vector
                    print("removing photo")

                    # k = Key(bucket)
                    # k.key = fname
                    # k.set_contents_from_filename(fname)
                    # key = bucket.lookup(fname)
                    # key.set_acl('public-read-write')
                    os.remove(fname)

                print("writing")

                with open('extracted_dict.pickle', 'wb') as f:
                    pickle.dump(feature_dict, f)

                print("done writing")

                # feed_dict = {images_placeholder: images,
                # phase_train_placeholder: False}
                # feature_vector = sess.run(embeddings,
                #                           feed_dict=feed_dict)

                feature_names = np.array(list(feature_dict.keys()))
                print(feature_names)
                feature_np = np.squeeze(list(feature_dict.values()))

                redis_db.set('create_embs', '')
                print("done creating embs")
                if use_buffer:
                    print("reopening capture")
                    capture = cv2.VideoCapture(stream_url)

            try:
                if use_fvs:
                    print("capture frame with fvs")
                    if not fvs.more():
                        continue
                    frame = fvs.read()
                elif use_buffer:
                    print("capture frame with cv2 buffer")
                    time.sleep(1)
                    # time.sleep(2.5)
                    for i in range(50):
                        capture.grab()

                    ret, frame = capture.read()
                else:
                    print("capture frame with cv2")
                    capture = cv2.VideoCapture(stream_url)
                    ret, frame = capture.read()

                gray = cv2.cvtColor(frame, 0)
            except:
                # continue
                raise Exception("capture failed")

            print("converted to gray")
            if gray.size < 0:
                print("skipping")
                continue

            print(gray.size)
            response, faces, bboxs = align_face(gray, pnet, rnet, onet)
            print(response)
            print("{} faces found.".format(len(faces)))
            svg_document = svgwrite.Drawing(size=(dim1, dim2))
            svg_document.add(svg_document.rect(
                                 insert=(0, 0),
                                 size=(dim1, dim2),
                                 stroke_width="10",
                                 #  stroke_width="7",
                                 stroke="green",
                                 fill="rgb(0,0,0)",
                                 fill_opacity=0
                                 ))

            if response is True:
                # svg_document = svgwrite.Drawing(size=(dim1, dim2))
                # svg_document.add(svg_document.rect(
                #                 insert=(0, 0),
                #                 size=(dim1, dim2),
                #                 stroke_width="10",
                #                 #stroke_width="7",
                #                 stroke="green",
                #                 fill="rgb(0,0,0)",
                #                 fill_opacity=0)
                #                 )
                for i, image in enumerate(faces):
                    # 640 360
                    dim1, dim2 = frame.shape[1], frame.shape[0]
                    bb = bboxs[i]
                    images = load_img(image, False, False, image_size)
                    feed_dict = {images_placeholder: images,
                                 phase_train_placeholder: False}
                    print("start run emb")
                    feature_vector = sess.run(embeddings, feed_dict=feed_dict)
                    print("start identify")
                    result, distance = identify_person(feature_vector,
                                                       feature_names,
                                                       feature_np,
                                                       1)
                    print("identified: %s, distance: %.3f" % (result,
                                                              distance))
                    print("calculate svg")
                    # if distance < 1.0:

                    if (1-distance) > 0.15:
                        print("name: {} distance: {} text: {}".format(
                            result,
                            distance,
                            1-distance))
                        startX = bb[0]
                        startY = bb[1]
                        box_w = bb[2] - startX
                        box_h = bb[3] - startY
                        svg_document.add(svg_document.rect(
                                         insert=(int(startX), int(startY)),
                                         size=("{}px".format(box_w),
                                               "{}px".format(box_h)),
                                         #  stroke_width="10",
                                         stroke_width="7",
                                         stroke="yellow",
                                         fill="rgb(0,0,0)",
                                         fill_opacity=0)
                                         )
                        text = "{} {:.2f}".format(result, 1-distance)
                        text_style = "font-size:50px; font-family:Courier New;\
                                      stroke:yellow; stroke-width:0.2em;"
                        svg_document.add(svg_document.text(
                                         text,
                                         insert=(int(startX), int(startY)+20),
                                         fill="black",
                                         style=text_style)
                                         )
                        text_style = "font-size:50px; font-family:Courier New;"
                        svg_document.add(svg_document.text(
                                         text,
                                         insert=(int(startX), int(startY)+20),
                                         fill="black",
                                         style=text_style)
                                         )
                    else:
                        print("name: {} distance: {}".format(result, distance))
                        startX = bb[0]
                        startY = bb[1]
                        box_w = bb[2] - startX
                        box_h = bb[3] - startY

                        svg_document.add(svg_document.rect(
                                         insert=(int(startX), int(startY)),
                                         size=("{}px".format(box_w),
                                               "{}px".format(box_h)),
                                         stroke_width="10",
                                         stroke="yellow",
                                         fill="rgb(0,0,0)",
                                         fill_opacity=0)
                                         )

            # print("export svg")
            # svg_string = svg_document.tostring()
            # print("write")
            # redis_db.set('overlay', svg_string)
            # print("done")
            print("export svg")
            svg_string = svg_document.tostring()
            print("write")
            redis_db.set('overlay', svg_string)
            print("done")


[docs]def write_svg_facenet(stream_url):
    '''
    Reads an alternative facenet model, and connects to the in-memory Redis
    database. Detects faces (no identification) in the specified stream and
    calculates the corresponding bounding boxes. Writes the bounding boxes for
    all detected faces to an svg overlay which is then saved to
    Redis to be accessed by other processes.
    '''
    print("[INFO] opening redis connection")
    redis_db = redis.StrictRedis(host="localhost", port=6379, db=0)
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-p", "--prototxt", required=True,
                    help="path to Caffe 'deploy' prototxt file")
    ap.add_argument("-m", "--model", required=True,
                    help="path to Caffe pre-trained model")
    ap.add_argument("-c", "--confidence", type=float, default=0.5,
                    help="minimum probability to filter weak detections")
    args = vars(ap.parse_args())

    # load our serialized model from disk
    print("[INFO] loading model...")
    net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
    process_this_frame = True
    while(True):
        # time.sleep(0.1)
        if process_this_frame:
            capture = cv2.VideoCapture(stream_url)
            ret, frame = capture.read()
            # cv2.imshow('Stream IP camera opencv', frame)

            # grab the frame from the threaded video stream and resize it
            # to have a maximum width of 400 pixels
            # frame = vs.read()
            # frame = imutils.resize(frame, width=400)

            # grab the frame dimensions and convert it to a blob
            (h, w) = frame.shape[:2]
            blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,
                                         (300, 300), (104.0, 177.0, 123.0))

            # pass the blob through the network and obtain the detections and
            # predictions
            net.setInput(blob)
            detections = net.forward()

            svg_document = svgwrite.Drawing(size=("1280px", "720px"))

            # loop over the detections
            for i in range(0, detections.shape[2]):
                # extract the confidence (i.e., probability) associated with
                # the prediction
                confidence = detections[0, 0, i, 2]

                # filter out weak detections by ensuring the `confidence` is
                # greater than the minimum confidence
                if confidence < args["confidence"]:
                    continue

                # compute the (x, y)-coordinates of the bounding box for the
                # object
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype("int")

                # draw the bounding box of the face along with the associated
                # probability
                box_w = endX-startX
                box_h = endY-startY

                # draw the bounding box of the face along with the associated
                # probability
                svg_document.add(svg_document.rect(
                                 insert=(int(startX), int(startY)),
                                 size=("{}px".format(box_w),
                                       "{}px".format(box_h)),
                                 stroke_width="10",
                                 stroke="yellow",
                                 fill="rgb(0,0,0)",
                                 fill_opacity=0)
                                 )
                text = "{:.2f}%".format(confidence * 100)
                text_style = "font-size:%ipx; \
                              font-family:%s" % (20, "Courier New")
                svg_document.add(svg_document.text(
                                 text,
                                 insert=(int(startX), int(startY)+20),
                                 fill="black",
                                 style=text_style)
                                 )
            # cv2.imshow("Frame", frame)
            # print(svg_document.tostring())
            # with open('overlay.xml', 'w') as f:
                # f.write(svg_document.tostring())
            redis_db.set('overlay', svg_document.tostring())
        process_this_frame = not process_this_frame
        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break

    cv2.destroyAllWindows()


if __name__ == '__main__':
    stream_url = "http://ec2-13-57-20-231.us-west-1.compute.amazonaws.com:\
        8090/pattern.webm"
    # stream_url = "http://52.23.243.107:8090/pattern.webm"
    # write_svg_haar(stream_url)
    write_svg_facenet_emb(stream_url)
Source code for face_detection_process

Pelicam

Navigation

Related Topics