恩培-计算机视觉

 找回密码
 立即注册
搜索
查看: 1475|回复: 1

ReID行人重识别跨境头搜人

[复制链接]

157

主题

103

回帖

3872

积分

管理员

Rank: 9Rank: 9Rank: 9

积分
3872
发表于 2023-10-29 18:59:21 | 显示全部楼层 |阅读模式

ReID行人重识别跨境头搜人

项目 内容
更新人 @恩培
更新时间 2023-10-29
标题 ReID行人重识别跨境头搜人
方法 ReID、metric learning、faiss vector store
相关付费内容 六、项目六:Human-ReID 跨镜头智能寻人系统

[toc]

1. 视频/截图

https://www.bilibili.com/video/BV1be41197ZD/?vd_source=39b1662212679b11469d17d3bee8df4e

2. 核心代码

# 将多个视频搜索结果合并成一个视频
# step:
# 1. 获取query特征
# 2. 加载多个视频的gallery特征,文件名列表
# 3. 分别在多个视频中搜索query
# 4. 将搜索结果合并成一个视频

import os
import cv2
import numpy as np
import glob
import matplotlib.pyplot as plt
import tqdm
import torch
import pickle
# 使用faiss向量库
import faiss
from PIL import Image
# 提取特征
from model.REID import getImgFeat
# args
import argparse


class GridDemo:
    def __init__(self, query_path, threshold=0.5) :
        # 获取query特征
        self.query_feat = getImgFeat(query_path).numpy()
        self.query_img_data = cv2.imread(query_path)
        # 获取多个视频的gallery特征、文件名列表
        indexs_list = []
        imgs_list = []
        # 只搜索4个视频
        self.query_video_num = 6
        for i in tqdm.tqdm(range(1, 1+self.query_video_num), desc="读取多个视频的gallery特征、文件名列表..."):
            imgs, index = self.loadVideoFeatAndFilenames(i)
            indexs_list.append(index)
            imgs_list.append(imgs)
        # 搜索阈值
        self.threshold = threshold
        # 搜索结果
        self.results_img_list = []
        self.results_dist_list = []
        # 搜索
        for i in tqdm.tqdm(range(self.query_video_num),desc="搜索多个视频中的query..."):
            img_list, D = self.queryVideo(indexs_list[i], imgs_list[i])
            self.results_img_list.append(img_list)
            self.results_dist_list.append(D)

        # 视频cap对象列表
        self.cap_list = []
        for i in tqdm.tqdm(range(1, 1+self.query_video_num), desc="读取视频cap对象..."):
            cap = cv2.VideoCapture('./videos/cam{}.mp4'.format(i))
            self.cap_list.append(cap)
        # 缩放比例
        self.scale = 3

    # 在单个视频中搜索query,返回根据frame_id排序的图片列表、置信度(距离)
    def queryVideo(self, index, imgs):
        # 搜索
        lims, D, I = index.range_search(self.query_feat, self.threshold) 
        # 根据索引I找到对应的图片
        # img_list: key为图片索引,value为图片路径
        img_list = {}
        for idx, val in enumerate(I):
            img_list[idx] = imgs[val]

        # 文件根据frame_id排序
        img_list = sorted(img_list.items(), key=lambda x: int(x[1].split("_")[1]))

        # 根据img_list的key,重新对D和I进行排序
        keys = []
        for i in img_list:
            keys.append(i[0])

        # 根据key重新排序,使用的是numpy的fancy indexing,即D[[1,2,3]],表示取D中索引为1,2,3的元素
        D_new = D[keys]
        # I_new = I[keys]

        return img_list, D_new



    # 获取单个视频的文件名列表、特征
    def loadVideoFeatAndFilenames(self, video_index):
        # 读取文件名列表
        with open('./weights/cam{}_human_features.pickle'.format(video_index), 'rb') as f:
            data = pickle.load(f)
            imgs = data['imgs']
        # 读取faiss索引
        index = faiss.read_index('./weights/cam{}.index'.format(video_index))

        return imgs, index

    # 获取缩放后画面的ltrb
    def getResizedLtrb(self, l, t, r, b):
        # 缩放后的ltrb
        l_new = int(l / self.scale)
        t_new = int(t / self.scale)
        r_new = int(l_new + (r - l) / self.scale)
        b_new = int(t_new + (b - t) / self.scale)
        return l_new, t_new, r_new, b_new
    # 获取视频某一帧
    # 参数:video_index: 视频索引、offset_id: 偏移量
    def getVideoFrame(self, video_index, offset_id):
        # 检查偏移量是否越界
        if offset_id >= len(self.results_img_list[video_index]):
            print("camera {} offset_id {} out of range".format(video_index+1, offset_id))
            return None, None, None
        # 获取offset_id对应的frame、图片索引
        id, frame = self.results_img_list[video_index][offset_id]
        frame_id = int(frame.split("_")[1]) # 原始画面的帧号
        # ltrb
        l = int(frame.split("_")[2])
        t = int(frame.split("_")[3])
        r = int(frame.split("_")[4])
        b = int(frame.split("_")[5].split(".")[0])
        # 缩放后的ltrb
        l, t, r, b = self.getResizedLtrb(l, t, r, b)
        # jump to frame
        self.cap_list[video_index].set(cv2.CAP_PROP_POS_FRAMES, frame_id)
        # 读取
        ret, video_frame = self.cap_list[video_index].read()
        # 缩放为原来的1/self.scale
        video_frame = cv2.resize(video_frame, (video_frame.shape[1]//self.scale, video_frame.shape[0]//self.scale))
        # draw rectangle
        cv2.rectangle(video_frame, (l, t), (r, b), (0, 255, 0), 2)
        # draw similarity, x100再保留两位小数
        similarity = round(self.results_dist_list[video_index][id]*100, 2)
        cv2.putText(video_frame, str(similarity) + "%", ( l, t-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        # draw camera id
        cv2.putText(video_frame, "camera: " + str(video_index+1), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        # draw frame id
        cv2.putText(video_frame, "frame: " + str(frame_id), (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

        # img_frame
        img_frame = cv2.imread(frame)
        return video_frame , img_frame, similarity




    # 将搜索结果合并成一个视频
    def run(self):
        # 读取视频,跳转到指定帧
        offset_id = 0
        # resize
        resize_w = 720
        resize_h = 400
        # 当前黑屏的窗口,状态为True
        black_window_num = {}

        # writer
        writer = cv2.VideoWriter(
            "./output/grid.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 25, (resize_w*2, 150+resize_h*3)
        )

        while True:
            # 创建一个宽度为resize_w*2,高度为150的黑色画布
            canvas = np.zeros((150, resize_w * 2, 3), dtype=np.uint8)
            # 缩放query图片为高度120, 宽度按比例缩放
            query_ratio = self.query_img_data.shape[1] / self.query_img_data.shape[0]
            query_img = cv2.resize(self.query_img_data,  (int(100*query_ratio), 100))
            # 将query图片贴到画布左侧中心
            canvas[15:15+query_img.shape[0], 100:100+query_img.shape[1]] = query_img
            # 下方加query图片的文字说明
            cv2.putText(canvas, "query", (90, 140), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
            # 读取6个视频的frame
            frame_list = []
            for i in range(self.query_video_num):
                video_frame , img_frame, similarity = self.getVideoFrame(i, offset_id)
                if video_frame is None:
                    # 用黑色填充 720 x 480
                    video_frame = np.zeros((resize_h, resize_w, 3), dtype=np.uint8)
                    # 在黑色背景正中间绘制文字:camera: i+1 end
                    cv2.putText(video_frame, "camera: " + str(i+1) + " end", (int(resize_w/2)-100, int(resize_h/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
                    black_window_num[i] = True
                    # 如果6个窗口都是黑的,则退出
                    if len(black_window_num) == self.query_video_num:
                        return

                else:
                    # 统一缩放为720x480
                    video_frame = cv2.resize(video_frame, (resize_w, resize_h))
                    # 将img_frame按照camera_id贴到画布右侧
                    # 缩放为高度120, 宽度按比例缩放
                    img_ratio = img_frame.shape[1] / img_frame.shape[0]
                    img_frame = cv2.resize(img_frame,  (int(100*img_ratio), 100))
                    # 将img_frame贴到画布右侧中心
                    l = 300 + 200*i
                    t = 15
                    r = l + img_frame.shape[1]
                    b = t + img_frame.shape[0]
                    canvas[t:b, l:r] = img_frame
                    # 下方显示相似度
                    cv2.putText(canvas, str(similarity) + "%", (l-10, b+30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)

                frame_list.append(video_frame)
            # 拼接为3row x 2col
            frame1 = np.hstack((frame_list[0], frame_list[1]))
            frame2 = np.hstack((frame_list[2], frame_list[3]))
            frame3 = np.hstack((frame_list[4], frame_list[5]))
            frame_v = np.vstack((canvas, frame1, frame2, frame3))

            # 写入
            writer.write(frame_v)
            # 显示,缩放
            cv2.imshow("frame", frame_v)

            offset_id += 1
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break






# 运行
# query
test_img = './images/cam3_10068_1537_370_1608_474.jpg'
# test_img = './images/cam1_227_1284_335_1573_1127.jpg'
# test_img = './images/cam1_9302_848_180_1009_739.jpg'
parser = argparse.ArgumentParser(description='grid_video')
parser.add_argument('--query_path', type=str, default=test_img, help='query image path')
parser.add_argument('--threshold', type=float, default=0.5, help='threshold')
args = parser.parse_args()
grid_demo = GridDemo(query_path = args.query_path , threshold=args.threshold)
grid_demo.run()

回复

使用道具 举报

0

主题

1

回帖

14

积分

新手上路

Rank: 1

积分
14
发表于 2023-11-9 10:04:44 | 显示全部楼层
回复

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

恩培-计算机视觉

GMT+8, 2024-7-20 21:16 , Processed in 0.074207 second(s), 23 queries .

Powered by Discuz! X3.4

© 2001-2023 Discuz! Team.

快速回复 返回顶部 返回列表