From 5baaf91b5b26e4201ba2ce529f833c62474a6fd7 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 28 May 2023 20:07:17 +0200 Subject: [PATCH] Add ProfileModel `min_time=60`s argument (#2881) --- ultralytics/yolo/utils/benchmarks.py | 43 ++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/ultralytics/yolo/utils/benchmarks.py b/ultralytics/yolo/utils/benchmarks.py index b87fd2b..19010bc 100644 --- a/ultralytics/yolo/utils/benchmarks.py +++ b/ultralytics/yolo/utils/benchmarks.py @@ -156,17 +156,26 @@ class ProfileModels: Attributes: paths (list): Paths of the models to profile. num_timed_runs (int): Number of timed runs for the profiling. Default is 100. - num_warmup_runs (int): Number of warmup runs before profiling. Default is 3. + num_warmup_runs (int): Number of warmup runs before profiling. Default is 10. + min_time (float): Minimum number of seconds to profile for. Default is 60. imgsz (int): Image size used in the models. Default is 640. Methods: profile(): Profiles the models and prints the result. """ - def __init__(self, paths: list, num_timed_runs=100, num_warmup_runs=10, imgsz=640, trt=True, device=None): + def __init__(self, + paths: list, + num_timed_runs=100, + num_warmup_runs=10, + min_time=60, + imgsz=640, + trt=True, + device=None): self.paths = paths self.num_timed_runs = num_timed_runs self.num_warmup_runs = num_warmup_runs + self.min_time = min_time self.imgsz = imgsz self.trt = trt # run TensorRT profiling self.device = device or torch.device(0 if torch.cuda.is_available() else 'cpu') @@ -236,15 +245,24 @@ class ProfileModels: if not self.trt or not Path(engine_file).is_file(): return 0.0, 0.0 - # Warmup runs + # Model and input model = YOLO(engine_file) input_data = np.random.rand(self.imgsz, self.imgsz, 3).astype(np.float32) # must be FP32 - for _ in range(self.num_warmup_runs): - model(input_data, verbose=False) + + # Warmup runs + elapsed = 0.0 + for _ in range(3): + start_time = time.time() + for _ in range(self.num_warmup_runs): + model(input_data, verbose=False) + elapsed = time.time() - start_time + + # Compute number of runs as higher of min_time or num_timed_runs + num_runs = max(round(self.min_time / elapsed * self.num_warmup_runs), self.num_timed_runs * 50) # Timed runs run_times = [] - for _ in tqdm(range(self.num_timed_runs * 50), desc=engine_file): + for _ in tqdm(range(num_runs), desc=engine_file): results = model(input_data, verbose=False) run_times.append(results[0].speed['inference']) # Convert to milliseconds @@ -283,12 +301,19 @@ class ProfileModels: output_name = sess.get_outputs()[0].name # Warmup runs - for _ in range(self.num_warmup_runs): - sess.run([output_name], {input_name: input_data}) + elapsed = 0.0 + for _ in range(3): + start_time = time.time() + for _ in range(self.num_warmup_runs): + sess.run([output_name], {input_name: input_data}) + elapsed = time.time() - start_time + + # Compute number of runs as higher of min_time or num_timed_runs + num_runs = max(round(self.min_time / elapsed * self.num_warmup_runs), self.num_timed_runs) # Timed runs run_times = [] - for _ in tqdm(range(self.num_timed_runs), desc=onnx_file): + for _ in tqdm(range(num_runs), desc=onnx_file): start_time = time.time() sess.run([output_name], {input_name: input_data}) run_times.append((time.time() - start_time) * 1000) # Convert to milliseconds