Source code for inaFaceAnalyzer.face_detector

#!/usr/bin/env python
# encoding: utf-8

# The MIT License

# Copyright (c) 2021 Ina (David Doukhan & Zohra Rezgui- http://www.ina.fr/)

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

"""
Face detection classes are in charge of finding faces in image frames.

Two face detection classes are provided : 
    - :class:`LibFaceDetection` (default) 
    - :class:`OcvCnnFacedetector`.


Face detection classes inherits from abstract class :class:`FaceDetector` and share a common interface.
They are designed as `*functions objects* or *functors* <https://en.wikipedia.org/wiki/Function_object>`_
using image frame inputs and returning list of :class:`Detection` instances.

>>> from inaFaceAnalyzer.opencv_utils import imread_rgb
>>> from inaFaceAnalyzer.face_detector import LibFaceDetection
>>> # read image
>>> img = imread_rgb('./media/dknuth.jpg')
>>> # instantiate a detector (costly - to be done a single time)
>>> detector = LibFaceDetection()
>>> #call the detector instance as a function - setting verbose to True is slower, but display intermediate results
>>> ldetections = detector(img, verbose=True)
>>> print(ldetections)
[Detection(bbox=Rect(x1=113.9406801111573, y1=63.12627956950275, x2=287.63299981285394, y2=280.43775060093793), detect_conf=0.9999985098838806)]

"""


from abc import ABC, abstractmethod
from typing import NamedTuple
import cv2
import numpy as np
import onnxruntime

from .rect import Rect
from .remote_utils import get_remote
from .opencv_utils import disp_frame_shapes, disp_frame
from .libfacedetection_priorbox import PriorBox


[docs]class Detection(NamedTuple): """ Atomic element returned by face detection classes """ #: position of the detected face in the image in pixels bbox : Rect #: face detection confidence (0 = lowest confidence, 1 = highest confidence) detect_conf : float
# Currently, we wish to use the same eye detection procedure for all detection engines # class DetectionEyes(NamedTuple): # """ # Contains a detection (Rect bounding box & detection confidence) # + eyes coordinates (x1, y1, x2, y2) for left eye and right eye # """ # bbox : Rect # detect_conf : float # eyes : Rect
[docs]class FaceDetector(ABC): # @classmethod # @abstractmethod # def output_type() : pass output_type = Detection
[docs] def __init__(self, minconf, min_size_px, min_size_prct, padd_prct): """ Common face detection constructor Args: minconf (float between 0 and 1): the minimal face detection confidence being returned (default values dependent on the face detection class choosen). min_size_px (int): minimal face size in pixels (default 30): better classification results requires face sizes above 75 pixels. min_size_prct (float between 0 and 1): minimal face size as a percentage of image frame minimal dimension. Allow to focus on the most relevant faces. padd_prct (float between 0 and 1): percentage of black padding pixels to be applied on images before detection (default values are set or each detection class). """ self.minconf = minconf self.min_size_px = min_size_px self.min_size_prct = min_size_prct self.padd_prct = padd_prct
[docs] def __call__(self, frame, verbose=False): """ Perform face detection on image frames Args: frame (:class:`numpy.ndarray`): RGB image frame (height, width, 3). verbose (bool, optional): display intermediate results such as detected faces Not to be used in production. Defaults to False. Returns: list of :class:`Detection` instances """ tmpframe = frame if self.padd_prct: tmpframe, yoffset, xoffset = _blackpadd(frame, self.padd_prct) lret = self._call_imp(tmpframe) # filter detected faces to return only faces with a dimension length # (absolute or relative) # face classification algorithms may be affected by small face sizes min_frame_dim = min(frame.shape[:2]) min_face_size = max(self.min_size_px, self.min_size_prct * min_frame_dim) if min_face_size > 0: lret = [e for e in lret if e.bbox.max_dim_len >= min_face_size] if self.padd_prct: lret = [e._replace(bbox=e.bbox.transpose(-xoffset, -yoffset)) for e in lret] if verbose: print('%d DETECTED FACES' % len(lret)) disp_frame_shapes(frame, [e.bbox for e in lret], []) for detection in lret: x1, y1, x2, y2 = [e for e in detection.bbox.to_int()] print(detection) x1 = max(x1, 0) y1 = max(y1, 0) x2 = min(x2, frame.shape[1]) y2 = min(y2, frame.shape[0]) disp_frame(frame[y1:y2, x1:x2, :]) return lret
@abstractmethod def _call_imp(self, frame): pass
[docs] def most_central_face(self, frame, contain_center=True, verbose=False): """ To be used for processing ML datasets and training new face classification models. Some ML face corpora images containing several faces, with the target annotated face at the center. This method returns the detected face which is closest from the center of the image frame Args: frame (:class:`numpy.ndarray`): RGB image frame (height, width, 3) contain_center (bool, optional): if True, the returned face MUST include image center. Defaults to True. verbose (bool, optional): Display detected faces. Defaults to False. Returns: Detection: if a face matching the conditions has been detected, else None """ frame_center = (frame.shape[1] / 2, frame.shape[0] / 2) # keep faces containing image center if contain_center: faces = [f for f in self(frame, verbose) if frame_center in f.bbox] else: faces = [f for f in self(frame, verbose)] if len(faces) == 0: return None ldists = [_sqdist(f.bbox.center, frame_center) for f in faces] am = np.argmin(ldists) return faces[am]
[docs] def get_closest_face(self, frame, ref_bbox, min_iou=.7, squarify=True, verbose=False): """ To be used for processing ML datasets and training new face classification models. Some face corpora images may contain several annotated faces. This method return the detected face having the largest IOU with target ref_box. The IOU must be > to min_iou. Args: frame (:class:`numpy.ndarray`): RGB image frame (height, width, 3). ref_bbox (tuple or Rect): reference face bounding box (x1, y1, x2, y2). min_iou (float, optional): minimal acceptable intersection over union between the detected face to be returned and the reference bounding box. Defaults to .7. squarify (TYPE, optional): if True, returns the smallest square bounding box containing the detected face. If False returns the original detected face bounding box. Defaults to True. verbose (TYPE, optional): display intermediate results. Defaults to False. Returns: :class:`Detection` or None: detected face matching the criteria (largest IOU with ref_bbox and IOU > min_iou), else None """ if not isinstance(ref_bbox, Rect): ref_bbox = Rect(*ref_bbox) # get closest detected faces from ref_bbox if squarify: f = lambda x: x.square else: f = lambda x: x ref_bbox = f(ref_bbox) lfaces = self(frame, verbose) if len(lfaces) == 0: return None liou = [f(ref_bbox).iou(f(detection.bbox)) for detection in lfaces] if verbose: print([f(detection.bbox) for detection in lfaces]) print('liou', liou) am = np.argmax(liou) if liou[am] < min_iou: return None return lfaces[am]
def _sqdist(p1, p2): ''' return squared distance between points p1(x,y) and p2(x,y) ''' return (p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2 def _blackpadd(frame, paddpercent): # add black around image y, x, z = frame.shape #offset = int(max(x, y) * paddpercent) xoffset = int(x * paddpercent) yoffset = int(y * paddpercent) ret = np.zeros((y + 2 * yoffset, x + 2 * xoffset, z), dtype=frame.dtype) ret[yoffset:(y + yoffset), xoffset:(x + xoffset), :] = frame return ret, yoffset, xoffset
[docs]class OcvCnnFacedetector(FaceDetector): """ This class wraps OpenCV default CNN face detection model. Images are fist resized to 300*300 pixels, which may result in missing the smallest faces but allows to get fast detection time. """ #output_type = Detection
[docs] def __init__(self, minconf=0.65, min_size_px=30, min_size_prct=0, padd_prct=0.15): super().__init__(minconf, min_size_px, min_size_prct, padd_prct) fpb = get_remote('opencv_face_detector_uint8.pb') fpbtxt = get_remote('opencv_face_detector.pbtxt') self.model = cv2.dnn.readNetFromTensorflow(fpb, fpbtxt)
def _call_imp(self, frame): """ Detect faces from an image Parameters: frame (array): Image to detect faces from. Returns: faces_data (list) : List containing : - the bounding box - face detection confidence score """ faces_data = [] h, w, z = frame.shape # The CNN is intended to work images resized to 300*300 # tests were carried on using different input size and were associated # to usatisfactory results blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], True, False) self.model.setInput(blob) detections = self.model.forward() assert(np.all(-np.sort(-detections[:,:,:,2]) == detections[:,:,:,2])) for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence < self.minconf: break bbox = Rect(*detections[0, 0, i, 3:7]) # remove noisy detections coordinates if bbox.x1 >= 1 or bbox.y1 >= 1 or bbox.x2 <= 0 or bbox.y2 <= 0: continue if bbox.x1 >= bbox.x2 or bbox.y1 >= bbox.y2: continue # Map relative coordinates 0...1 to absolute frame width and height bbox = bbox.mult(w, h) faces_data.append(Detection(bbox, confidence)) return faces_data
[docs]class LibFaceDetection(FaceDetector): """ This class wraps the face detection model provided in `libfacedetection <https://github.com/ShiqiYu/libfacedetection>`_ : a recent face detection library (2021) that can take advantage of GPU acceleration and is able de detect the smallest faces. It may be slow when used with high resolution images. For more details, please refer to : Peng, H., & Yu, S. (2021). A systematic iou-related method: Beyond simplified regression for better localization. IEEE Transactions on Image Processing, 30, 5032-5044. """ # output_type = DetectionEyes #output_type = Detection
[docs] def __init__(self, minconf=.98, min_size_px=30, min_size_prct=0, padd_prct=0): super().__init__(minconf, min_size_px, min_size_prct, padd_prct) model_src = get_remote('libfacedetection-yunet.onnx') try: self.model = onnxruntime.InferenceSession(model_src, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) except: self.model = onnxruntime.InferenceSession(model_src, providers=['CPUExecutionProvider']) self.nms_thresh = 0.3 # Threshold for non-max suppression self.keep_top_k = 750 # Keep keep_top_k for results outputing self.dprior = {}
def _call_imp(self, frame): bgr_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) h, w, _ = frame.shape # convert to NN input blob = np.expand_dims(np.transpose(bgr_frame, (2, 0, 1)), axis = 0).astype(np.float32) # NN inference loc, conf, iou = self.model.run([], {'input': blob}) # Decode bboxes and landmarks # TODO: set a limit of dict length ? There may be RAM issues when # considering a large image collection with heterogenous sizes if (w, h) not in self.dprior: self.dprior[(w, h)] = PriorBox(input_shape=(w, h), output_shape=(w, h)) pb = self.dprior[(w, h)] dets = pb.decode(loc, conf, iou, self.minconf) # dirty hack used for google collab compatibility if len(dets.shape) == 3 and dets.shape[1] == 1: dets = dets.reshape((dets.shape[0], dets.shape[2])) assert len(dets.shape) == 2, dets.shape # NMS if dets.shape[0] > 0: # NMS from OpenCV keep_idx = cv2.dnn.NMSBoxes( bboxes=dets[:, 0:4].tolist(), scores=dets[:, -1].tolist(), score_threshold=self.minconf, nms_threshold=self.nms_thresh, eta=1, top_k=self.keep_top_k) dets = dets[keep_idx] else: return [] # dirty hack used for google collab compatibility # it works - to be investiguated if len(dets.shape) == 3 and dets.shape[1] == 1: dets = dets.reshape((dets.shape[0], dets.shape[2])) assert len(dets.shape) == 2, dets.shape assert dets.shape[1] == 15, dets.shape lret = [] for i in range(len(dets)): score = dets[i,-1] x1, y1, w, h = dets[i,:4] bbox = Rect(x1, y1, x1 + w, y1 + h) #eyes = Rect(*dets[i, 4:8]) #lret.append(DetectionEyes(bbox, score, eyes)) lret.append(Detection(bbox, score)) return lret
[docs]class IdentityFaceDetector(FaceDetector): """ This class do not detect faces and return bouding boxes corresponding to the whole image frame. It should be used for processing images or videos corresponding to already-detected cropped faces. """ #output_type = Detection
[docs] def __init__(self): """ IdentityFaceDetector Constructor does not require arguments """ super().__init__(0, 0, 0, 0)
def _call_imp(self, frame): return [Detection(Rect(0, 0, frame.shape[1], frame.shape[0]), np.NAN)]
class PrecomputedDetector(FaceDetector): #output_type = Detection def __init__(self, lbbox = []): super().__init__(0, 0, 0, 0) self.lbbox = lbbox.copy() def _call_imp(self, frame): if len(self.lbbox) == 0: return [] ret = self.lbbox.pop(0) if isinstance(ret, tuple): ret = [ret] return [Detection(Rect(*e), None) for e in ret] def facedetection_cmdline(parser): ''' Update command line parser with face detection related arguments Parameters ---------- parser : argparse.ArgumentParser command line parser to be updated ''' da = parser.add_argument_group('optional arguments related to face detection') da.add_argument ('--face_detector', default='LibFaceDetection', choices=['LibFaceDetection', 'OcvCnnFacedetector'], help='''face detection module to be used: LibFaceDetection can take advantage of GPU acceleration and has a higher recall. OcvCnnFacedetector is embedded in OpenCV. It is faster for large resolutions since it first resize input frames to 300*300. It may miss small faces''') da.add_argument('--face_detection_confidence', type=float, help='''minimal confidence threshold to be used for face detection. Default values are 0.98 for LibFaceDetection and 0.65 for OcvCnnFacedetector''') da.add_argument('--min_face_size_px', default=30, type=int, dest='size_px', help='''minimal absolute size in pixels of the faces to be considered for the analysis. Optimal classification results are obtained for sizes above 75 pixels.''') da.add_argument('--min_face_size_percent', default=0, type=float, dest='size_prct', help='''minimal relative size (percentage between 0 and 1) of the faces to be considered for the analysis with respect to image frames minimal dimension (generally height for videos)''') da.add_argument('--face_detection_padding', default=None, type=float, dest='face_detection_padding', help='''Black padding percentage to be applied to image frames before face detection. 0.15 Padding may help detecting large faces occupying the whole image with OcvCnnFacedetector. Default padding values are 0.15 for OcvCnnFacedetector and 0 for LibFaceDetection''') def facedetection_factory(args): ''' Instanciate a face detection object from parsed command line arguments Parameters ---------- args : Namespace Namespace containing fields face_detector, face_detection_confidence, min_face_size_px, min_face_size_percent Returns ------- instance of class FaceDetector ''' dargs = {'min_size_px': args.size_px, 'min_size_prct': args.size_prct} if args.face_detection_padding is not None: dargs['padd_prct'] = args.face_detection_padding if args.face_detection_confidence: dargs['minconf'] = args.face_detection_confidence if args.face_detector == 'LibFaceDetection': detector = LibFaceDetection(**dargs) elif args.face_detector == 'OcvCnnFacedetector': detector = OcvCnnFacedetector(**dargs) else: raise NotImplementedError(args.face_dector) return detector