通过深度学习实现安全帽佩戴的检测

：gaoshine

原文链接：https://www.jianshu.com/p/2101f5e5e577

查看更多的专业文章，请移步至“人工智能LeadAI”公众号，查看更多的课程信息和产品信息，请移步至全新打造的官网：www.leadai.org.

正文共6736个字，5张图，预计阅读时间17分钟。

前一段时间做企业的智能安全项目，我们在面对一些问题时，大胆采用深度学习的方法，解决传统算法和统计学算法不好实现的问题，今天就和大家分享一下，如何解决通过视频监控检测工人是否佩戴安全帽的深度学习算法。

通过深度学习实现安全帽佩戴的检测

安全帽检测

通过深度学习实现安全帽佩戴的检测

企业安全智能监控画面

概述

对于图像识别，采用传统的算法（opencv的一些算法），判断形状、颜色等等，我们在实验室和办公场所做测试，效果还不错，和容易识别出来。一旦到了工业现场，图像完全不行，连人和车都识别不出来。在不同光线下不论采用什么颜色空间（RGB、HSV什么）都无法分离出合理的色彩，更不要提判断和检测了。有感于实际的现场环境，决定放弃传统的算法，拿起深度学习的工具，来搞定这个项目。

数据准备

高大上的人工智能背后，一定是苦逼的数据准备，深度学习的模型，需要成千上万的训练和测试数据，这些数据的采集和整理，还有数据的清洗都是体力活啊。

当然，我还是没傻到一张张去拍照片。我通过现场的摄像头做了视频采集，然后拿到录像，做了一个代码从录像中找到人，再把人的上半部分处理一下，变成标准格式的图片。这样子，2-3天的录像就可以产生几十万张图片了，看来训练集的问题解决了。

# -*- coding:utf-8 -*- # 录像转换为图片from time import gmtime, strftimeimport cv2videoFile = ‘/media/kingstar/kingstardata/safety_eyes/nohatdata/7.mp4’cap = cv2.VideoCapture(videoFile)cap.set(3,640)cap.set(4,480) while(True): ret, frame = cap.read() img = frame cv2.imshow(‘my’, img) f = strftime(“%Y%m%d%H%M%S.jpg”, gmtime()) cv2.imwrite(‘output/’+ f, img) if cv2.waitKey(1) & 0xFF == ord(‘q’): break if img.size == 0: breakcap.releasecv2.destroyAllWindows()

采用SSD的算法（用于物体检测的深度学习主流算法之一）检测出图片中的人。

# -*-coding: utf-8-*-from keras.preprocessing import imagefrom keras.applications.imagenet_utils import preprocess_input from scipy.misc import imreadimport numpy as npfrom ssd import SSD300from ssd_utils import BBoxUtilityimport matplotlib.pyplot as pltimport cv2from os import listdir voc_classes = [‘Aeroplane’, ‘Bicycle’, ‘Bird’, ‘Boat’, ‘Bottle’, ‘Bus’, ‘Car’, ‘Cat’, ‘Chair’, ‘Cow’, ‘Diningtable’, ‘Dog’, ‘Horse’,’Motorbike’, ‘Person’, ‘Pottedplant’, ‘Sheep’, ‘Sofa’, ‘Train’, ‘Tvmonitor’]NUM_CLASSES = len(voc_classes) + 1input_shape=(300, 300, 3)model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights(‘weights_SSD300.hdf5’, by_name=True)bbox_util = BBoxUtility(NUM_CLASSES) def ssd(img_path): global oPath inputs = [] images = [] #img_path = ‘test02.jpg’ img0 = cv2.imread(img_path) img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs)) preds = model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) #print results # Parse the outputs. for i, img in enumerate(images): det_label = results[i][:, 0] det_conf = results[i][:, 1] det_xmin = results[i][:, 2] det_ymin = results[i][:, 3] det_xmax = results[i][:, 4] det_ymax = results[i][:, 5] #print i,det_label,det_conf # Get detections with confidence higher than 0.6. top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.5] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] #colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() #plt.imshow(img / 255.) #currentAxis = plt.gca() #print top_label_indices #print top_conf #print top_conf.shape[0] for i0 in range(top_conf.shape[0]): xmin = int(round(top_xmin[i0] * img.shape[1]))

ymin = int(round(top_ymin[i0] * img.shape[0])) xmax = int(round(top_xmax[i0] * img.shape[1])) ymax = int(round(top_ymax[i0] * img.shape[0]))

score = top_conf[i0] label = int(top_label_indices[i0]) label_name = voc_classes[label – 1] #display_txt = ‘{:0.2f}, {}’.format(score, label_name) #coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1 #color = colors[label] #currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) #currentAxis.text(xmin, ymin, display_txt, bbox={‘facecolor’:color, ‘alpha’:0.5}) print label_name,score,xmin,ymin,xmax,ymax fileStr0 = img_path.split(‘.’)[-2] fileStr0 = fileStr0.split(‘/’)[-1] if label_name == ‘Person’: fileStr = ‘%s/Person5/%s.%d.jpg’ %(oPath,fileStr0,i0+1) im = img0[ymin:ymax,xmin:xmax] r = cv2.imwrite(fileStr,im) print ‘Person0’,fileStr if label_name == ‘Car1’ or label_name == ‘Motorbike1’: fileStr = ‘%s/Car/%s.%d.jpg’ %(oPath,fileStr0,i0+1) im = img0[ymin:ymax,xmin:xmax] r = cv2.imwrite(fileStr,im) print ‘Car0’,fileStr #plt.show() #cv2.imshow(‘im’, im) #cv2.waitKey(0) if __name__ == “__main__”: img_path = ‘test02.jpg’ mPath = ‘/media/kingstar/kingstardata/safety_eyes/baidu5’ oPath = ‘/media/kingstar/kingstardata/safety_eyes/out’ trainFileList = listdir(mPath) m =len(trainFileList) print m for i in range(m): fileNameStr = trainFileList[i] fileStr = fileNameStr.split(‘.’)[-2] print i,fileNameStr,fileStr

fileNameStr = ‘%s/%s’ % (mPath,fileNameStr) print ‘step:%d/%d’ % (i,m) ssd(fileNameStr)

这样就可以建立自己的训练集：

train和test （带不带帽子的标注需要人工去做… 这个还是很苦逼）

通过深度学习实现安全帽佩戴的检测

训练集

搭建模型

考虑到标准的图片只有128*128，特征不是很多，就动手搭建一个不算深的深度学习模型，采用卷积神经网络处理图形特征，搞过cnn的同学会觉得so easy。

model = Sequential() model.add(Convolution2D(32, 3, 3, input_shape=( img_width, img_height, 3))) model.add(Activation(‘relu’)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Convolution2D(64, 3, 3)) model.add(Activation(‘relu’)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Convolution2D(64, 3, 3)) model.add(Activation(‘relu’)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(64)) model.add(Activation(‘relu’)) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation(‘sigmoid’)) model.compile(loss=’binary_crossentropy’, optimizer=’rmsprop’, metrics=[‘accuracy’])

这是个只有三层的卷积神经网络，我们就拿这个模型进行训练和学习吧。

训练神经网络

深度学习的训练是极其需要强大的算力的，多亏我们的模型较小，另外我们还DIY了一台深度学习服务器，有了强大的GPU做运算。

经过了一晚上的训练，终于出了结果，数据上还不错，识别率竟然到了95%以上。

具体代码如下：

# -*-coding: utf-8-*-from keras.preprocessing.image import ImageDataGeneratorfrom keras.models import Sequentialfrom keras.layers import Convolution2D, MaxPooling2Dfrom keras.layers import Activation, Dropout, Flatten, Dense # dimensions of our images.img_width, img_height = 128, 128 train_data_dir = ‘/media/kingstar/kingstardata/data/train’validation_data_dir = ‘/media/kingstar/kingstardata/data/test’nb_train_samples = 4000nb_validation_samples = 800nb_epoch = 60 model = Sequential()model.add(Convolution2D(32, 3, 3, input_shape=( img_width, img_height, 3))) model.add(Activation(‘relu’)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Convolution2D(64, 3, 3)) model.add(Activation(‘relu’)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Convolution2D(64, 3, 3)) model.add(Activation(‘relu’)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(64)) model.add(Activation(‘relu’)) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation(‘sigmoid’)) model.compile(loss=’binary_crossentropy’, optimizer=’rmsprop’, metrics=[‘accuracy’])train_datagen = ImageDataGenerator( rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=32, class_mode=’binary’) validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=32, class_mode=’binary’) model.fit_generator( train_generator, samples_per_epoch=nb_train_samples, nb_epoch=nb_epoch, validation_data=validation_generator, nb_val_samples=nb_validation_samples)

model.save(‘trianHat12801.h5’)

通过深度学习实现安全帽佩戴的检测

现场

结论

通过简单的cnn模型和一个小规模的数据集的训练，基本上达到了目标。

不过在实际测试用的识别率还是比较低，感觉还是无法直接用于生产环境。

没关系，下一步我们会采用成熟的模型vgg或resnet，在这个模型后端做修改，进行调优和训练，另外提高训练集的数量和进一步做数据清洗，已达到可以直接在生产环境上使用的目标。

来源：人工智能LeadAI

声明：本站部分文章及图片源自用户投稿，如本站任何资料有侵权请您尽早请联系jinwei@zod.com.cn进行处理,非常感谢！