一:关于Facenet
Facenet 是Florian Schroff等人2015年提出的一种人脸识别的模型,论文地址
FaceNet: A Unified Embedding for Face Recognition and Clustering
Facenet 的基本流程是首先在给出的图片中选出人脸的区域,然后计算人脸的特征embings,输入一张图片,就可以将所有包含在图片中的所有人脸特征,每个人脸特曾可以使用128维的向量表示。要做人脸识别,那么一个简单的思路就是,计算待检测样本与数据库中的人脸特征数据对比距离,距离越小那么待检测的样本就越有可能是这个人。为了避免数据库中无事前录入的人脸信息,而导致的错误,我们可以设置一个最小的距离阈值。
二:开发pepper机器人
pepper机器人只支持python2.7版本的,而我们平时使用的是python3.x,模型中涉及到各种依赖,两个版本之间的兼容性难以处理。博主采取的方式是机器人采集照片通过网络传回服务器,服务器部署人脸识别模型,服务器处理完成之后,把识别结果返回给机器人,机器人做相关的处理。
三 :代码
1机器人采集照片,并传送给服务器
import naoqi
from naoqi import ALProxy
import socket
import time
address = ('192.168.100.22', 2567)
photoCaptureProxy = ALProxy("ALPhotoCapture", "192.168.100.108", 9559)
tts=ALProxy("ALTextToSpeech", "192.168.100.108", 9559)
photoCaptureProxy.setResolution(2)
photoCaptureProxy.setPictureFormat("jpg")
def takephoto(): #机器人以每秒1张速度采集照片
while True:
photos = photoCaptureProxy.takePictures(1, "/home/nao/recordings/cameras/", "image")
send(photos)
print('即将发送{}'.format(photos))
time.sleep(1)
def getfaceInfor():
pass
def send(photos): #将照片信息传回服务器,接受服务器的处理结果,并作出相关的操作
for photo in photos[0]:
print('sending {}'.format(photo))
data = file_deal(photo)
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(address)
sock.send('{}|{}'.format(len(data), file).encode())
reply = sock.recv(1024)
if 'ok' == reply.decode():
go = 0
total = len(data)
while go < total:
data_to_send = data[go:go + 1024]
sock.send(data_to_send)
go += len(data_to_send)
reply = sock.recv(1024)
if 'copy' == reply.decode():
print('{} send successfully'.format(photo))
sock.send(b'infor')
person_infor = sock.recv(1024)
person_name = person_infor
if 'no' != person_name:
sayHello(person_name)
sock.close()
def sayHello(person_name): #说出人名
tts.say('你好{}'.format(person_name))
def file_deal(file_path):
mes = b''
try:
file = open(file_path,'rb')
mes = file.read()
except:
print('error{}'.format(file_path))
else:
file.close()
return mes
if __name__ == '__main__':
takephoto()
2,服务器端代码
LOCAL_IP = '192.168.100.22'
PORT = 2567
def init_sourceData(path):
image_paths = []
try:
image_dir = os.listdir(path)
for file in image_dir:
image_path = os.path.join(path,file)
if os.path.isfile(image_path):
image_paths.append(image_path)
except FileNotFoundError as e:
print(e)
return image_paths
def generate_dataBase(image_paths):
minsize = 20 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
image_size = 160
controller = None
sessD = None
data_h5 = None
try:
data_h5 = h5py.File('people_infor.h5','w')
except FileNotFoundError as e:
print(e)
with tf.Graph().as_default():
print("开始加载图片对齐模型")
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
print("开始加载图片识别模型")
with tf.Session() as sessD:
# Load the model
facenet.load_model('20170512-110547')
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
for image in image_paths:
img_list = []
img = misc.imread(os.path.expanduser(image), mode='RGB')
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if len(bounding_boxes) < 1:
image_paths.remove(image)
print("没有发现人脸", image)
continue
det = np.squeeze(bounding_boxes[0, 0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
prewhitened = facenet.prewhiten(aligned)
img_list.append(prewhitened)
images = np.stack(img_list)
# 计算特征
# Run forward pass to calculate embeddings
feed_dict = {images_placeholder: images, phase_train_placeholder: False}
emb = sessD.run(embeddings, feed_dict=feed_dict)
if h5py != None:
people_infor = os.path.split(image)
people_name = None
if len(people_infor) == 2:
people_name = os.path.splitext(people_infor[1])[0]
else:
people_name = os.path.splitext(people_infor[0])[0]
data_h5[people_name] = emb[0,:]
else:
print("生成h5文件失败")
break
try:
data_h5.close()
except:
print("生成h5文件失败")
def recognition():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # socket.AF_INET ipv4 socket.SOCK_STREAM tcp
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((LOCAL_IP, PORT))
print("开始加载图片识别模型")
with tf.Graph().as_default():
with tf.Session() as sess:
facenet.load_model('20170512-110547')
print("==========================")
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
hfut_face.load_database()
print('人脸识别模型加载完成!')
sock.listen(3)
print('开始监听端口:')
while True:
sc, sc_name = sock.accept()
print('收到{}请求'.format(sc_name))
infor = sc.recv(1024)
length, file_name = infor.decode().split('|')
if length and file_name:
newfile = open('image/' + str(random.randint(1, 10000)) + '.jpg', 'wb')
print('length {},filename {}'.format(length, file_name))
sc.send(b'ok')
file = b''
total = int(length)
get = 0
while get < total:
data = sc.recv(1024)
file += data
get = get + len(data)
print('应该接受{},实际接受{}'.format(length, len(file)))
if file:
print('acturally length:{}'.format(len(file)))
newfile.write(file[:])
newfile.close()
images = hfut_face_align.align_image(newfile.name)
person_infor = 'no'
if len(images) > 0:
result_emb = hfut_face.calculat_embing(images, sess, embeddings, images_placeholder,
phase_train_placeholder)
if result_emb['face'] != 0:
name = hfut_face.validate(result_emb['emb'])
person_infor = name
print('识别结果:%s' % name)
else:
print('识别失败,请重试')
sc.send(b'copy')
reply = sc.recv(32).decode()
if 'infor' == reply:
sc.send(person_infor.encode())
sc.close()
def prepare():
paths = init_sourceData('imageData')
generate_dataBase(paths)
if __name__ == '__main__':
# prepare() # 生成数据库
recognition()
hfut_facenet.py
minsize = 20 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
image_size = 160
controller = None
people_source = {}
def calculat_embing(images,sess,embeddings,images_placeholder,phase_train_placeholder):
result = {}
result['face'] = 0
print("图片识别模型完成!")
feed_dict = {images_placeholder: images, phase_train_placeholder: False}
emb = sess.run(embeddings, feed_dict=feed_dict)
result['emb'] = emb
result['face'] = 1
return result
def validate(emb):
person_name = ""
tem_dis = 99
for name in people_source:
dist = np.sqrt(np.sum(np.square(np.subtract(emb, people_source[name]))))
if dist<tem_dis:
tem_dis = dist
person_name = name
return person_name
def init_sourceData(path):
image_paths = []
try:
image_dir = os.listdir(path)
for file in image_dir:
image_path = os.path.join(path,file)
if os.path.isfile(image_path):
image_paths.append(image_path)
except FileNotFoundError as e:
print(e)
return image_paths
def generate_dataBase(image_paths):
minsize = 20 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
image_size = 160
controller = None
sessD = None
data_h5 = None
try:
data_h5 = h5py.File('people_infor.h5','w')
except FileNotFoundError as e:
print(e)
with tf.Graph().as_default():
print("开始加载图片对齐模型")
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
print("开始加载图片识别模型")
with tf.Session() as sessD:
# Load the model
facenet.load_model('20170512-110547')
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
for image in image_paths:
img_list = []
img = misc.imread(os.path.expanduser(image), mode='RGB')
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if len(bounding_boxes) < 1:
image_paths.remove(image)
print("没有发现人脸", image)
continue
det = np.squeeze(bounding_boxes[0, 0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
prewhitened = facenet.prewhiten(aligned)
img_list.append(prewhitened)
images = np.stack(img_list)
# 计算特征
# Run forward pass to calculate embeddings
feed_dict = {images_placeholder: images, phase_train_placeholder: False}
emb = sessD.run(embeddings, feed_dict=feed_dict)
if h5py != None:
people_infor = os.path.split(image)
people_name = None
if len(people_infor) == 2:
people_name = os.path.splitext(people_infor[1])[0]
else:
people_name = os.path.splitext(people_infor[0])[0]
data_h5[people_name] = emb[0,:]
else:
print("生成h5文件失败")
break
try:
data_h5.close()
except:
print("生成h5文件失败")
def load_database():
h5file = h5py.File('people_infor.h5','r')
for name in h5file.keys():
people_source[name] = h5file[name]
print("加载人脸数据完成!")
hfut_face_align.py #对齐模型
minsize = 20 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
margin = 44
image_size = 160
controller = None
sessD = None
people_source = {}
tf.Graph().as_default()
print("开始加载图片对齐模型")
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
sess.as_default()
pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
print("图片对齐模型加载完成!")
def align_image(image):
images = []
img_list = []
img = misc.imread(os.path.expanduser(image), mode='RGB')
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if len(bounding_boxes) < 1:
print("没有发现人脸", image)
return images
det = np.squeeze(bounding_boxes[0, 0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
prewhitened = facenet.prewhiten(aligned)
img_list.append(prewhitened)
images = np.stack(img_list)
return images
目前人脸识别还不是太完善,希望和你一起交流讨论。
转载自原文链接, 如需删除请联系管理员。
原文链接:基于python开发pepper机器人的人脸识别--使用facenet,转载请注明来源!