1、设备jetson agx xavier



4、conda虚拟环境 python=3.6



Nvidia jetson xavier agx 安装pytorch1.9.0 Gpu版_Ponnyao的博客-CSDN博客_xavier安装pytorch


  1. conda activate pytorch #我的虚拟环境名字是pytorch
  2. pip3 install pycuda


  1. #查看tensorrt路径
  2. sudo find / -name tensorrt *
  3. #进入虚拟环境的此路径
  4. cd /home /nvidia /archiconda /envs /pytorch /lib /python 3.6 /site-packages
  5. #设置软连接
  6. ln -s /usr /lib /python 3.6 /dist-packages /tensorrt
  7. #上一步不行的话用这个
  8. ln -s /usr /lib /python 3.6 /dist-packages /tensorrt /tensorrt.so




以yolov5 _6.0为例

  1. mkidr yolov 5_tensorrt
  2. cd yolov 5_tensorrt
  3. git clone -b v 6.0 https: / /github.com /ultralytics /yolov 5.git
  4. git clone https: / /github.com /wang-xinyu /tensorrtx.git


下载后,放到 yolov5_tensorrt/yolov5文件夹下



  1. cp yolov 5_tensorrt /tensorrtx /yolov 5 /gen_wts.py yolov 5_tensorrt /yolov 5
  2. cd yolov 5_tensorrt /yolov 5
  3. python 3 gen_wts.py -w yolov 5s.pt -o yolov 5s.wts


  1. cd yolov 5_tensorrt /tensorrtx /yolov 5 /
  2. mkdir build
  3. cd build
  4. cp yolov 5_tensorrt /yolov 5 /yolov 5s.wts yolov 5_tensorrt /tensorrtx /yolov 5 /build
  5. cmake ..
  6. make
  7. sudo . /yolov 5 -s yolov 5s.wts yolov 5s.engine s





  1. "" "
  2. An example that uses TensorRT's Python api to make inferences.
  3. " ""
  4. import ctypes
  5. import os
  6. import shutil
  7. import random
  8. import sys
  9. import threading
  10. import time
  11. import cv 2
  12. import numpy as np
  13. import pycuda.autoinit
  14. import pycuda.driver as cuda
  15. import tensorrt as trt
  16. import torch
  17. import torchvision
  18. import argparse
  19. CONF_THRESH = 0.5
  20. IOU_THRESHOLD = 0.4
  21. def get_img_path_batches(batch_ size, img_dir):
  22. ret = []
  23. batch = []
  24. for root, dirs, files in os.walk(img_dir):
  25. for name in files:
  26. if len(batch) = = batch_ size:
  27. ret.append(batch)
  28. batch = []
  29. batch.append(os.path.join(root, name))
  30. if len(batch) > 0:
  31. ret.append(batch)
  32. return ret
  33. def plot_one_box(x, img, color =None, label =None, line_thickness =None):
  34. "" "
  35. description: Plots one bounding box on image img,
  36. this function comes from YoLov5 project.
  37. param:
  38. x: a box likes [x1,y1,x2,y2]
  39. img: a opencv image object
  40. color: color to draw rectangle, such as (0,255,0)
  41. label: str
  42. line_thickness: int
  43. return:
  44. no return
  45. " ""
  46. tl = (
  47. line_thickness or round( 0.002 * (img.shape[ 0] + img.shape[ 1]) / 2) + 1
  48. ) # line /font thickness
  49. color = color or [ random.randint( 0, 255) for _ in range( 3)]
  50. c1, c 2 = (int(x[ 0]), int(x[ 1])), (int(x[ 2]), int(x[ 3]))
  51. cv 2.rectangle(img, c 1, c 2, color, thickness =tl, lineType =cv 2. LINE_AA)
  52. if label:
  53. tf = max(tl - 1, 1) # font thickness
  54. t_ size = cv 2.getTextSize(label, 0, fontScale =tl / 3, thickness =tf)[ 0]
  55. c 2 = c 1[ 0] + t_ size[ 0], c 1[ 1] - t_ size[ 1] - 3
  56. cv 2.rectangle(img, c 1, c 2, color, - 1, cv 2. LINE_AA) # filled
  57. cv 2.putText(
  58. img,
  59. label,
  60. (c 1[ 0], c 1[ 1] - 2),
  61. 0,
  62. tl / 3,
  63. [ 225, 255, 255],
  64. thickness =tf,
  65. lineType =cv 2. LINE_AA,
  66. )
  67. class YoLov 5TRT( object):
  68. "" "
  69. description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops.
  70. " ""
  71. def __init__( self, engine_ file_path):
  72. # Create a Context on this device,
  73. self.ctx = cuda.Device( 0).make_context()
  74. stream = cuda.Stream()
  75. TRT_LOGGER = trt.Logger(trt.Logger.INFO)
  76. runtime = trt.Runtime(TRT_LOGGER)
  77. # Deserialize the engine from file
  78. with open(engine_ file_path, "rb") as f:
  79. engine = runtime.deserialize_cuda_engine(f. read())
  80. context = engine.create_execution_context()
  81. host_inputs = []
  82. cuda_inputs = []
  83. host_outputs = []
  84. cuda_outputs = []
  85. bindings = []
  86. for binding in engine:
  87. print( 'bingding:', binding, engine. get_binding_shape(binding))
  88. size = trt.volume(engine. get_binding_shape(binding)) * engine.max_batch_ size
  89. dtype = trt.nptype(engine. get_binding_dtype(binding))
  90. # Allocate host and device buffers
  91. host_mem = cuda.pagelocked_empty( size, dtype)
  92. cuda_mem = cuda.mem_alloc(host_mem.nbytes)
  93. # Append the device buffer to device bindings.
  94. bindings.append(int(cuda_mem))
  95. # Append to the appropriate list.
  96. if engine.binding_ is_ input(binding):
  97. self. input_w = engine. get_binding_shape(binding)[- 1]
  98. self. input_h = engine. get_binding_shape(binding)[- 2]
  99. host_inputs.append(host_mem)
  100. cuda_inputs.append(cuda_mem)
  101. else:
  102. host_outputs.append(host_mem)
  103. cuda_outputs.append(cuda_mem)
  104. # Store
  105. self.stream = stream
  106. self.context = context
  107. self.engine = engine
  108. self.host_inputs = host_inputs
  109. self.cuda_inputs = cuda_inputs
  110. self.host_outputs = host_outputs
  111. self.cuda_outputs = cuda_outputs
  112. self.bindings = bindings
  113. self.batch_ size = engine.max_batch_ size
  114. def infer( self, input_image_path):
  115. threading.Thread.__init__( self)
  116. # Make self the active context, pushing it on top of the context stack.
  117. self.ctx.push()
  118. self. input_image_path = input_image_path
  119. # Restore
  120. stream = self.stream
  121. context = self.context
  122. engine = self.engine
  123. host_inputs = self.host_inputs
  124. cuda_inputs = self.cuda_inputs
  125. host_outputs = self.host_outputs
  126. cuda_outputs = self.cuda_outputs
  127. bindings = self.bindings
  128. # Do image preprocess
  129. batch_image_raw = []
  130. batch_origin_h = []
  131. batch_origin_w = []
  132. batch_ input_image = np.empty(shape =[ self.batch_ size, 3, self. input_h, self. input_w])
  133. input_image, image_raw, origin_h, origin_w = self.preprocess_image( input_image_path
  134. )
  135. batch_origin_h.append(origin_h)
  136. batch_origin_w.append(origin_w)
  137. np.copyto(batch_ input_image, input_image)
  138. batch_ input_image = np.ascontiguousarray(batch_ input_image)
  139. # Copy input image to host buffer
  140. np.copyto(host_inputs[ 0], batch_ input_image.ravel())
  141. start = time. time()
  142. # Transfer input data to the GPU.
  143. cuda.memcpy_htod_async(cuda_inputs[ 0], host_inputs[ 0], stream)
  144. # Run inference.
  145. context.execute_async(batch_ size = self.batch_ size, bindings =bindings, stream_handle =stream.handle)
  146. # Transfer predictions back from the GPU.
  147. cuda.memcpy_dtoh_async(host_outputs[ 0], cuda_outputs[ 0], stream)
  148. # Synchronize the stream
  149. stream.synchronize()
  150. end = time. time()
  151. # Remove any context from the top of the context stack, deactivating it.
  152. self.ctx.pop()
  153. # Here we use the first row of output in that batch_ size = 1
  154. output = host_outputs[ 0]
  155. # Do postprocess
  156. result_boxes, result_scores, result_classid = self.post_process(
  157. output, origin_h, origin_w)
  158. # Draw rectangles and labels on the original image
  159. for j in range(len(result_boxes)):
  160. box = result_boxes[j]
  161. plot_one_box(
  162. box,
  163. image_raw,
  164. label = "{}:{:.2f}". format(
  165. categories[int(result_classid[j])], result_scores[j]
  166. ),
  167. )
  168. return image_raw, end - start
  169. def destroy( self):
  170. # Remove any context from the top of the context stack, deactivating it.
  171. self.ctx.pop()
  172. def get_raw_image( self, image_path_batch):
  173. "" "
  174. description: Read an image from image path
  175. " ""
  176. for img_path in image_path_batch:
  177. yield cv 2.imread(img_path)
  178. def get_raw_image_ zeros( self, image_path_batch =None):
  179. "" "
  180. description: Ready data for warmup
  181. " ""
  182. for _ in range( self.batch_ size):
  183. yield np. zeros([ self. input_h, self. input_w, 3], dtype =np.uint 8)
  184. def preprocess_image( self, input_image_path):
  185. "" "
  186. description: Convert BGR image to RGB,
  187. resize and pad it to target size, normalize to [0,1],
  188. transform to NCHW format.
  189. param:
  190. input_image_path: str, image path
  191. return:
  192. image: the processed image
  193. image_raw: the original image
  194. h: original height
  195. w: original width
  196. " ""
  197. image_raw = input_image_path
  198. h, w, c = image_raw.shape
  199. image = cv 2.cvtColor(image_raw, cv 2.COLOR_BGR 2RGB)
  200. # Calculate widht and height and paddings
  201. r_w = self. input_w / w
  202. r_h = self. input_h / h
  203. if r_h > r_w:
  204. tw = self. input_w
  205. th = int(r_w * h)
  206. tx 1 = tx 2 = 0
  207. ty 1 = int(( self. input_h - th) / 2)
  208. ty 2 = self. input_h - th - ty 1
  209. else:
  210. tw = int(r_h * w)
  211. th = self. input_h
  212. tx 1 = int(( self. input_w - tw) / 2)
  213. tx 2 = self. input_w - tw - tx 1
  214. ty 1 = ty 2 = 0
  215. # Resize the image with long side while maintaining ratio
  216. image = cv 2.resize(image, (tw, th))
  217. # Pad the short side with ( 128,128,128)
  218. image = cv 2.copyMakeBorder(
  219. image, ty 1, ty 2, tx 1, tx 2, cv 2.BORDER_ CONSTANT, ( 128, 128, 128)
  220. )
  221. image = image.astype(np.float 32)
  222. # Normalize to [ 0,1]
  223. image / = 255.0
  224. # HWC to CHW format:
  225. image = np.transpose(image, [ 2, 0, 1])
  226. # CHW to NCHW format
  227. image = np.expand_dims(image, axis = 0)
  228. # Convert the image to row-major order, also known as "C order":
  229. image = np.ascontiguousarray(image)
  230. return image, image_raw, h, w
  231. def xywh 2xyxy( self, origin_h, origin_w, x):
  232. "" "
  233. description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
  234. param:
  235. origin_h: height of original image
  236. origin_w: width of original image
  237. x: A boxes tensor, each row is a box [center_x, center_y, w, h]
  238. return:
  239. y: A boxes tensor, each row is a box [x1, y1, x2, y2]
  240. " ""
  241. y = torch. zeros_like(x) if isinstance(x, torch.Tensor) else np. zeros_like(x)
  242. r_w = self. input_w / origin_w
  243. r_h = self. input_h / origin_h
  244. if r_h > r_w:
  245. y[:, 0] = x[:, 0] - x[:, 2] / 2
  246. y[:, 2] = x[:, 0] + x[:, 2] / 2
  247. y[:, 1] = x[:, 1] - x[:, 3] / 2 - ( self. input_h - r_w * origin_h) / 2
  248. y[:, 3] = x[:, 1] + x[:, 3] / 2 - ( self. input_h - r_w * origin_h) / 2
  249. y / = r_w
  250. else:
  251. y[:, 0] = x[:, 0] - x[:, 2] / 2 - ( self. input_w - r_h * origin_w) / 2
  252. y[:, 2] = x[:, 0] + x[:, 2] / 2 - ( self. input_w - r_h * origin_w) / 2
  253. y[:, 1] = x[:, 1] - x[:, 3] / 2
  254. y[:, 3] = x[:, 1] + x[:, 3] / 2
  255. y / = r_h
  256. return y
  257. def post_process( self, output, origin_h, origin_w):
  258. "" "
  259. description: postprocess the prediction
  260. param:
  261. output: A tensor likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...]
  262. origin_h: height of original image
  263. origin_w: width of original image
  264. return:
  265. result_boxes: finally boxes, a boxes tensor, each row is a box [x1, y1, x2, y2]
  266. result_scores: finally scores, a tensor, each element is the score correspoing to box
  267. result_classid: finally classid, a tensor, each element is the classid correspoing to box
  268. " ""
  269. # Get the num of boxes detected
  270. num = int( output[ 0])
  271. # Reshape to a two dimentional ndarray
  272. pred = np.reshape( output[ 1:], (- 1, 6))[:num, :]
  273. # to a torch Tensor
  274. pred = torch.Tensor(pred).cud a()
  275. # Get the boxes
  276. boxes = pred[:, : 4]
  277. # Get the scores
  278. scores = pred[:, 4]
  279. # Get the classid
  280. classid = pred[:, 5]
  281. # Choose those boxes that score > CONF_THRESH
  282. si = scores > CONF_THRESH
  283. boxes = boxes[si, :]
  284. scores = scores[si]
  285. classid = classid[si]
  286. # Trandform bbox from [center_x, center_y, w, h] to [x 1, y 1, x 2, y 2]
  287. boxes = self.xywh 2xyxy(origin_h, origin_w, boxes)
  288. # Do nms
  289. indices = torchvision.ops.nms(boxes, scores, iou_threshold =IOU_THRESHOLD).cpu()
  290. result_boxes = boxes[indices, :].cpu()
  291. result_scores = scores[indices].cpu()
  292. result_classid = classid[indices].cpu()
  293. return result_boxes, result_scores, result_classid
  294. class inferThread(threading.Thread):
  295. def __init__( self, yolov 5_wrapper):
  296. threading.Thread.__init__( self)
  297. self.yolov 5_wrapper = yolov 5_wrapper
  298. def infer( self , frame):
  299. batch_image_raw, use_ time = self.yolov 5_wrapper.infer(frame)
  300. # for i, img_path in enumerate( self.image_path_batch):
  301. # parent, filename = os.path.split(img_path)
  302. # save_name = os.path.join( 'output', filename)
  303. # # Save image
  304. # cv 2.imwrite(save_name, batch_image_raw[i])
  305. # print( 'input->{}, time->{:.2f}ms, saving into output/'. format( self.image_path_batch, use_ time * 1000))
  306. return batch_image_raw, use_ time
  307. class warmUpThread(threading.Thread):
  308. def __init__( self, yolov 5_wrapper):
  309. threading.Thread.__init__( self)
  310. self.yolov 5_wrapper = yolov 5_wrapper
  311. def run( self):
  312. batch_image_raw, use_ time = self.yolov 5_wrapper.infer( self.yolov 5_wrapper. get_raw_image_ zeros())
  313. print( 'warm_up->{}, time->{:.2f}ms'. format(batch_image_raw[ 0].shape, use_ time * 1000))
  314. if __name__ = = "__main__":
  315. # load custom plugins
  316. parser = argparse.ArgumentParser()
  317. parser. add_argument( '--engine', nargs = '+', type =str, default = "build/yolov5s.engine", help = '.engine path(s)')
  318. parser. add_argument( '--save', type =int, default = 0, help = 'save?')
  319. opt = parser.parse_args()
  320. PLUGIN_LIBRARY = "build/libmyplugins.so"
  321. engine_ file_path = opt.engine
  322. ctypes.CDLL(PLUGIN_LIBRARY)
  323. # load coco labels
  324. categories = [ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
  325. "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
  326. "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
  327. "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
  328. "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
  329. "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
  330. "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
  331. "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
  332. "hair drier", "toothbrush"]
  333. # a YoLov 5TRT instance
  334. yolov 5_wrapper = YoLov 5TRT(engine_ file_path)
  335. cap = cv 2.VideoCapture( 0)
  336. try:
  337. thread 1 = inferThread(yolov 5_wrapper)
  338. thread 1. start()
  339. thread 1.join()
  340. while 1:
  341. _,frame = cap. read()
  342. img,t =thread 1.infer(frame)
  343. cv 2.imshow( "result", img)
  344. if cv 2.waitKey( 1) & 0XFF = = ord( 'q'): # 1 millisecond
  345. break
  346. finally:
  347. # destroy the instance
  348. cap. release()
  349. cv 2.destroyAllWindows()
  350. yolov 5_wrapper.destroy()


