# 本地部署NanoOWL
# 1、安装依赖
(1)安装 torch
wget https://developer.download.nvidia.cn/compute/redist/jp/v512/pytorch/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl
pip3 install torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl
# 或
pip3 install torch
1
2
3
4
5
2
3
4
5
(2)安装 tensorrt
pip3 install tensorrt
1
(3)安装 typing-extensions
# pip3 uninstall typing_extensions # 要安装4.4版本以上,
# 如果提示卸载失败,输入 pip3 show typing-extensions 找到库目录直接删除
# sudo rm -rf /usr/lib/python3/dist-packages/typing_extensions-3.7.4.1.egg-info
pip3 install typing_extensions==4.7.1 -i https://mirror.sjtu.edu.cn/pypi/web/simple
1
2
3
4
2
3
4
(4)安装 torch2trt
git clone https://github.com/NVIDIA-AI-IOT/torch2trt
cd torch2trt
python3 setup.py install
1
2
3
2
3
pip3 install transformers timm accelerate onnx aiohttp -i https://mirror.sjtu.edu.cn/pypi/web/simple
1
(5)安装 nanoowl
git clone https://github.com/NVIDIA-AI-IOT/nanoowl
cd nanoowl
python3 setup.py develop
ls /dev/video* # 查看视频设备
1
2
3
4
2
3
4
(6)安装 clip
pip3 install ftfy regex tqdm -i https://mirror.sjtu.edu.cn/pypi/web/simple
pip3 install git+https://github.com/openai/CLIP.git
1
2
2
# 2、为 OWL-ViT 视觉编码器构建 TensorRT 引擎
python3 -m nanoowl.build_image_encoder_engine data/owl_image_encoder_patch32.engine
1
注意:我在源码中未找到 data/owl_image_encoder_patch32.engine
文件,之后通过安装 docker nanoowl (opens new window),才找到这个文件并将复制 data 目录
# 3、模型下载和配置
mkdir owlvit-base-patch32
git clone https://huggingface.co/google/owlvit-base-patch32
1
2
2
修改模型地址:nanoowl/owl_predictor.py
第 157 行
self.model = OwlViTForObjectDetection.from_pretrained(model_name).to(self.device).eval()
self.processor = OwlViTProcessor.from_pretrained(model_name)
# 修改为
self.model = OwlViTForObjectDetection.from_pretrained('./owlvit-base-patch32/').to(self.device).eval()
self.processor = OwlViTProcessor.from_pretrained('./owlvit-base-patch32/')
1
2
3
4
5
6
2
3
4
5
6
# 4、运行示例
cd examples/tree_demo
python3 tree_demo.py ../../data/owl_image_encoder_patch32.engine
# 重启相机服务
sudo systemctl restart nvargus-daemon
1
2
3
4
5
2
3
4
5
# 4、打开实时相机
# 4.1 CSI 相机
def gstreamer_pipeline(
sensor_id=0,
width=1280,
height=720,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc sensor-id=%d !"
"video/x-raw(memory:NVMM), width=(int)%d, height=(int)%d, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
width,
height,
framerate,
flip_method,
width,
height,
)
)
camera = cv2.VideoCapture(gstreamer_pipeline(sensor_id=0),cv2.CAP_GSTREAMER)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# 4.2 USB 相机
camera = cv2.VideoCapture(0) # 0-9
1
# 5、参考
教程 - NanoOWL (opens new window)
jetson-containers (opens new window)
transformers (opens new window)