diff --git a/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/demo.py b/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/demo.py index 2ce25d77335212873bd8b072af2c21232a459262..43ba1ee12c23c56d1f7f6f0c406904ce029e2b6e 100644 --- a/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/demo.py +++ b/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/demo.py @@ -338,6 +338,8 @@ def voxel_object_detection_3d(config_path, model_name=None): with lock: output_frame = frame.copy() + + time.sleep(2) except FileExistsError as e: print(e) diff --git a/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/draw_point_clouds.py b/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/draw_point_clouds.py index 2f204926d0d0b1e0df64b60b2687bdd14236489c..5dd0c83e056f5c5e5c1f649ea9fb8894189c4f8a 100644 --- a/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/draw_point_clouds.py +++ b/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/draw_point_clouds.py @@ -123,7 +123,7 @@ def draw_point_cloud_bev( if id is not None: if font is None: - font = ImageFont.truetype("./fonts/arial.ttf", 40) + font = ImageFont.truetype("projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/fonts/arial.ttf", 40) pil_draw.text((y_bev, x_bev), str(id), font=font, align="center") diff --git a/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/models/.gitignore b/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/models/.gitignore deleted file mode 100644 index f59ec20aabf5842d237244ece8c81ab184faeac1..0000000000000000000000000000000000000000 --- a/projects/python/perception/object_detection_3d/demos/voxel_object_detection_3d/models/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/run/eval_tracking_3d.py b/run/eval_tracking_3d.py index 24e21a037175da6814354765da3511da73fb9a67..e5603ae09d77f1488931118ca70c9b88a2760208 100644 --- a/run/eval_tracking_3d.py +++ b/run/eval_tracking_3d.py @@ -76,6 +76,8 @@ def save_detection_inference( config="xyres_16.proto", eval_suffix="classic", return_uncertainty=True, + # format="kitti", + format="ab3dmot", ): config = os.path.join(config_roots[model_type], config,) @@ -101,7 +103,7 @@ def save_detection_inference( os.makedirs( os.path.join( model_path, - "tracking_inference_detections", + "tracking_inference_detections" + ("" if format == "kitti" else ("_" + format)), eval_suffix, "samples_" + str(samples), ), @@ -111,7 +113,7 @@ def save_detection_inference( with open( os.path.join( model_path, - "tracking_inference_detections", + "tracking_inference_detections" + ("" if format == "kitti" else ("_" + format)), eval_suffix, "samples_" + str(samples), track_id + ".txt", @@ -124,7 +126,7 @@ def save_detection_inference( output = learner.infer(input, samples=samples) result = "\n".join( - box.to_kitti_tracking_string(frame) for box in output + box.to_kitti_tracking_string(frame) if format == "kitti" else box.to_ab3dmot_string(frame) for box in output ) if len(output) > 0: @@ -156,6 +158,9 @@ def test_tracking( tracking_learner = ObjectTracking3DAb3dmotLearner() + is_abdet = False + is_input_det = True + for samples in samples_list: print("samples =", samples) @@ -169,6 +174,21 @@ def test_tracking( dataset_tracking_path + "/training/label_02/", "detection", ) + if is_abdet: + dataset = KittiTrackingDatasetIterator( + "run/models/ab3dmotdet", + # dataset_tracking_path + "/training/label_02/", + dataset_tracking_path + "/training/label_02/", + "tracking", + "detection" if is_input_det else "tracking", + ) + # dataset = KittiTrackingDatasetIterator( + # # "run/models/ab3dmotdet", + # dataset_tracking_path + "/training/label_02/", + # dataset_tracking_path + "/training/label_02/", + # "tracking", + # # "tracking", + # ) with open( os.path.join( @@ -176,19 +196,36 @@ def test_tracking( ), "w", ) as f: - results = tracking_learner.eval( - dataset, - os.path.join( - model_path, - "log_tracking_eval_" + eval_suffix + "_samples_" + str(samples), - ), - ) + + if is_input_det: + + results = tracking_learner.eval( + dataset, + os.path.join( + model_path, + "log_tracking_eval_" + eval_suffix + "_samples_" + str(samples), + ), + verbose=True, + ) + else: + results = tracking_learner.dataset_eval( + dataset, + os.path.join( + model_path, + "log_tracking_eval_" + eval_suffix + "_samples_" + str(samples), + ), + verbose=True, + ) for key, item in results.items(): f.write(key) - f.write(item) + f.write(str(item)) f.write("\n\n") + print(key) + print(item) + print("\n\n") + def test_model( model_type, diff --git a/src/opendr/engine/target.py b/src/opendr/engine/target.py index 43f91746eddb3634e0c5278e282c113362b403c7..c7f7af8f4fb846fcb45677f0fa5559bc5e69be15 100644 --- a/src/opendr/engine/target.py +++ b/src/opendr/engine/target.py @@ -690,6 +690,27 @@ class BoundingBox3D(Target): return result + def to_ab3dmot_string(self, frame): + + name_to_id = { + "Pedestrian": 1, + "Car": 2, + "Cyclist": 3, + } + + result = ",".join(map(str, [ + frame, + name_to_id[self.name], + *([0,0,0,0] if self.bbox2d is None else self.bbox2d), + float(self.confidence), + *self.dimensions[[1, 2, 0]], # w h l -> h w l ? + *self.location, # x y z + self.rotation_y, + self.alpha, + ])) + + return result + @property def name(self): return self.data["name"] @@ -1035,6 +1056,52 @@ class TrackingAnnotation3DList(Target): def __str__(self): return str(self.kitti(True)) + def d(self): + + result = "" + + for a in self.boxes: + + if a.name != "Car": + continue + + result += " ".join(map(str, [ + *np.array(a.dimensions)[[1, 2, 0]], + "|", + *a.location, + "|", + a.rotation_y, + a.id, + float(a.confidence), + ])) + + result += "\n" + + print(result) + + def d1(self): + + result = "" + + for a in self.boxes: + + if a.name != "Car": + continue + + result += " ".join(map(str, [ + a.id, + "hwl", + *np.array(a.dimensions)[[1, 2, 0]], # w h l -> h w l ? + "xyz", + *a.location, # x y z + a.rotation_y, + float(a.confidence), + ])) + + result += "\n" + + print(result) + class Heatmap(Target): """ @@ -1190,7 +1257,7 @@ class UncertaintyBoundingBox3D(BoundingBox3D): float(self.variance_occluded), float(self.variance_alpha), *([0,0,0,0] if self.variance_bbox2d is None else self.variance_bbox2d), # x y w h - *self.variance_dimensions, # w h l + *self.variance_dimensions[[1, 2, 0]], # w h l *self.variance_location, # x y z self.variance_rotation_y, float(self.variance_confidence), @@ -1198,6 +1265,21 @@ class UncertaintyBoundingBox3D(BoundingBox3D): return result + def to_ab3dmot_string(self, frame): + + result = super().to_ab3dmot_string(frame) + + result = ",".join(map(str, [ + *([0,0,0,0] if self.variance_bbox2d is None else self.variance_bbox2d), + float(self.variance_confidence), + *self.variance_dimensions[[1, 2, 0]], # w h l -> h w l ? + *self.variance_location, # x y z + self.variance_rotation_y, + self.variance_alpha, + ])) + + return result + @property def name(self): return self.data["name"] diff --git a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/data/preprocess.py b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/data/preprocess.py index b66bd8325ee6dd69f1b33d7320b80dcde4c942dd..e4829211eda573c17260c5700c4b0db2b60928a2 100644 --- a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/data/preprocess.py +++ b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/data/preprocess.py @@ -359,9 +359,9 @@ def _read_and_prep_v9(info, root_path, num_point_features, prep_func): def _prep_v9(points, calib, prep_func, annos=None): - rect = calib["R0_rect"].astype(np.float32) - Trv2c = calib["Tr_velo_to_cam"].astype(np.float32) - P2 = calib["P2"].astype(np.float32) + rect = calib["R0_rect"].astype(np.float64) + Trv2c = calib["Tr_velo_to_cam"].astype(np.float64) + P2 = calib["P2"].astype(np.float64) input_dict = { "points": points, diff --git a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/pytorch/core/box_torch_ops.py b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/pytorch/core/box_torch_ops.py index c36bea8317ec07eb6e7d3ce7512304ac52113759..49ae77aa9b87581f51a9d5b0590a3bc3f37e70c8 100644 --- a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/pytorch/core/box_torch_ops.py +++ b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/pytorch/core/box_torch_ops.py @@ -481,6 +481,7 @@ def uncertainty_box_camera_to_lidar(data, r_rect, velo2cam): def box_lidar_to_camera(data, r_rect, velo2cam): + data = data.type(torch.float64) xyz_lidar = data[..., 0:3] w, l, h = data[..., 3:4], data[..., 4:5], data[..., 5:6] r = data[..., 6:7] diff --git a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/run.py b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/run.py index e0ed58b3938e047b676bf82f0f91e510257d92f0..3043a478738c236ea12ecfe73ece0a0ad4a84240 100644 --- a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/run.py +++ b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/run.py @@ -39,15 +39,19 @@ def example_convert_to_torch(example, "reg_targets", "reg_weights", "bev_map", + "gt_boxes", + ] + float64_names = [ "rect", "Trv2c", "P2", - "gt_boxes", ] for k, v in example.items(): if k in float_names: example_torch[k] = torch.as_tensor(v, dtype=dtype, device=device) + elif k in float64_names: + example_torch[k] = torch.as_tensor(v, dtype=torch.float64, device=device) elif k in ["coordinates", "labels", "num_points"]: example_torch[k] = torch.as_tensor(v, dtype=torch.int32, diff --git a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/torchplus_tanet/tools.py b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/torchplus_tanet/tools.py index 657c2b43325b7d7796e2979081d08220c8857ec9..59e6fed16ff175f98b1ccc8bbdc2a250749ef00d 100644 --- a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/torchplus_tanet/tools.py +++ b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/second_detector/torchplus_tanet/tools.py @@ -44,7 +44,7 @@ def torch_to_np_dtype(ttype): type_map = { torch.float16: np.dtype(np.float16), torch.float32: np.dtype(np.float32), - torch.float16: np.dtype(np.float64), + torch.float64: np.dtype(np.float64), torch.int32: np.dtype(np.int32), torch.int64: np.dtype(np.int64), torch.uint8: np.dtype(np.uint8), diff --git a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/voxel_object_detection_3d_learner.py b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/voxel_object_detection_3d_learner.py index 65c8f33be1440f2c2ce0eb062958a3602fdfa543..621a024899bccebbb356866176e89ea36e04dc7d 100644 --- a/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/voxel_object_detection_3d_learner.py +++ b/src/opendr/perception/object_detection_3d/voxel_object_detection_3d/voxel_object_detection_3d_learner.py @@ -32,6 +32,7 @@ from opendr.perception.object_detection_3d.voxel_object_detection_3d.second_dete ) from opendr.perception.object_detection_3d.voxel_object_detection_3d.second_detector.run import ( compute_lidar_kitti_output, + comput_kitti_output, evaluate, example_convert_to_torch, train, @@ -434,7 +435,7 @@ class VoxelObjectDetection3DLearner(Learner): return result - def infer(self, point_clouds, samples=1): + def infer(self, point_clouds, samples=1, image_shape=(370, 1224)): if self.model is None: raise ValueError("No model loaded or created") @@ -486,11 +487,23 @@ class VoxelObjectDetection3DLearner(Learner): self.model.eval() input_data = None + is_camera_output = None def map_single(point_cloud): + + nonlocal is_camera_output + if isinstance(point_clouds, PointCloudWithCalibration): + if is_camera_output is None: + is_camera_output = True + elif not is_camera_output: + raise ValueError("Either all or no inputs should have calibration") return self.calib_infer_point_cloud_mapper(point_cloud) elif isinstance(point_clouds, PointCloud): + if is_camera_output is None: + is_camera_output = False + elif is_camera_output: + raise ValueError("Either all or no inputs should have calibration") return self.infer_point_cloud_mapper(point_cloud.data) else: raise ValueError("PointCloud or PointCloudWithCalibration expected") @@ -506,16 +519,27 @@ class VoxelObjectDetection3DLearner(Learner): "point_clouds should be a PointCloud or a list of PointClouds" ) + example = example_convert_to_torch(input_data, self.float_dtype, device=self.device) + output = self.model( - example_convert_to_torch(input_data, self.float_dtype, device=self.device), samples=samples, return_uncertainty=self.return_uncertainty, + example, samples=samples, return_uncertainty=self.return_uncertainty, ) if self.model_config.rpn.module_class_name in ["PSA", "VPSA", "RefineDet"]: output = output[-1] - annotations = compute_lidar_kitti_output( - output, self.center_limit_range, self.class_names, None, return_uncertainty=self.return_uncertainty - ) + if is_camera_output: + batch_image_shape = example["image_shape"] if "image_shape" in example else ( + [image_shape] * len(example["P2"]) + ) + + annotations = comput_kitti_output( + output, batch_image_shape, self.model_config.lidar_input or False, self.center_limit_range, self.class_names, None, return_uncertainty=self.return_uncertainty + ) + else: + annotations = compute_lidar_kitti_output( + output, self.center_limit_range, self.class_names, None, return_uncertainty=self.return_uncertainty + ) if self.return_uncertainty: result = [UncertaintyBoundingBox3DList.from_kitti(anno) for anno in annotations] diff --git a/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/ab3dmot.py b/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/ab3dmot.py index a90e9704152f23f0727a985d8e914407cd63a129..a1885b9c5db47dd5531ad14e7b9dd5dbb9c86754 100644 --- a/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/ab3dmot.py +++ b/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/ab3dmot.py @@ -104,6 +104,7 @@ class AB3DMOT(): tracked_boxes.append(tracklet.tracking_bounding_box_3d(self.frame)) result = TrackingAnnotation3DList(tracked_boxes) + result.d() return result def reset(self): diff --git a/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/core.py b/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/core.py index 3b382d5bb68cd5e89488b0ce458e9eda31e9f043..a8ae0a334d9193703ed3f3da3d26e7958cc63d96 100644 --- a/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/core.py +++ b/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/core.py @@ -91,7 +91,11 @@ def iou3D(corners1, corners2): # [8, 3] -> [8, 3] -> ([], []) rect2 = [(corners2[i, 0], corners2[i, 2]) for i in range(3, -1, -1)] area1 = polygon_area(np.array(rect1)[:, 0], np.array(rect1)[:, 1]) area2 = polygon_area(np.array(rect2)[:, 0], np.array(rect2)[:, 1]) - _, inter_area = convex_hull_intersection(rect1, rect2) + + if np.allclose(corners1, corners2): + inter_area = area1 + else: + _, inter_area = convex_hull_intersection(rect1, rect2) iou_2d = inter_area / (area1 + area2 - inter_area) y_max = min(corners1[0, 1], corners2[0, 1]) y_min = max(corners1[4, 1], corners2[4, 1]) diff --git a/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/evaluate.py b/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/evaluate.py index 201c14f0dbf95d6f7a5b5989128d7433173973df..3710c72f13052407072b1dac1e82c2e89a2ef2ea 100644 --- a/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/evaluate.py +++ b/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/evaluate.py @@ -27,7 +27,11 @@ from opendr.perception.object_detection_3d.voxel_object_detection_3d.second_dete ) from opendr.perception.object_tracking_3d.ab3dmot.logger import Logger from numba.cuda.cudadrv.error import CudaSupportError -from opendr.perception.object_tracking_3d.ab3dmot.algorithm.core import convert_3dbox_to_8corner, iou3D +from opendr.perception.object_tracking_3d.ab3dmot.algorithm.core import ( + convert_3dbox_to_8corner, + iou3D, +) + class TrackingData: """ @@ -134,14 +138,10 @@ class TrackingEvaluator(object): self.n_gts = ( [] ) # number of ground truth detections minus ignored false negatives and true positives PER SEQUENCE - self.n_igts = ( - [] - ) # number of ground ignored truth detections PER SEQUENCE + self.n_igts = [] # number of ground ignored truth detections PER SEQUENCE self.n_gt_trajectories = 0 self.n_gt_seq = [] - self.n_tr = ( - 0 # number of tracker detections minus ignored tracker detections - ) + self.n_tr = 0 # number of tracker detections minus ignored tracker detections self.n_trs = ( [] ) # number of tracker detections minus ignored tracker detections PER SEQUENCE @@ -164,15 +164,11 @@ class TrackingEvaluator(object): self.total_cost = 0 self.itp = 0 # number of ignored true positives self.itps = [] # number of ignored true positives PER SEQUENCE - self.tp = ( - 0 # number of true positives including ignored true positives! - ) + self.tp = 0 # number of true positives including ignored true positives! self.tps = ( [] ) # number of true positives including ignored true positives PER SEQUENCE - self.fn = ( - 0 # number of false negatives WITHOUT ignored false negatives - ) + self.fn = 0 # number of false negatives WITHOUT ignored false negatives self.fns = ( [] ) # number of false negatives WITHOUT ignored false negatives PER SEQUENCE @@ -199,9 +195,7 @@ class TrackingEvaluator(object): self.max_occlusion = ( max_occlusion # maximum occlusion of an object for evaluation ) - self.min_height = ( - min_height # minimum height of an object for evaluation - ) + self.min_height = min_height # minimum height of an object for evaluation self.n_sample_points = 500 # this should be enough to hold all groundtruth trajectories @@ -215,9 +209,7 @@ class TrackingEvaluator(object): """ try: - self._load_data( - ground_truths, cls=self.cls, loading_groundtruth=True - ) + self._load_data(ground_truths, cls=self.cls, loading_groundtruth=True) except IOError: return False return True @@ -237,7 +229,11 @@ class TrackingEvaluator(object): return True def _load_data( - self, input_sequences: List[TrackingAnnotation3DList], cls, min_score=-1000, loading_groundtruth=False + self, + input_sequences: List[TrackingAnnotation3DList], + cls, + min_score=-1000, + loading_groundtruth=False, ): """ Generic loader for ground truth and tracking data. @@ -281,19 +277,25 @@ class TrackingEvaluator(object): # KITTI tracking benchmark data format: # (frame,tracklet_id,objectType,truncation,occlusion,alpha,x1,y1,x2,y2,h,w,l,X,Y,Z,ry) - if not any([s for s in classes if s == trackingAnnotation3D.name.lower()]): + if not any( + [s for s in classes if s == trackingAnnotation3D.name.lower()] + ): continue # get fields from table t_data.frame = int(trackingAnnotation3D.frame) t_data.track_id = int(trackingAnnotation3D.id) - t_data.obj_type = trackingAnnotation3D.name.lower() # object type [car, pedestrian, cyclist, ...] + t_data.obj_type = ( + trackingAnnotation3D.name.lower() + ) # object type [car, pedestrian, cyclist, ...] t_data.truncation = int( trackingAnnotation3D.truncated ) # truncation [-1,0,1,2] t_data.occlusion = int( trackingAnnotation3D.occluded ) # occlusion [-1,0,1,2] - t_data.obs_angle = float(trackingAnnotation3D.alpha) # observation angle [rad] + t_data.obs_angle = float( + trackingAnnotation3D.alpha + ) # observation angle [rad] t_data.x1 = float(trackingAnnotation3D.bbox2d[0]) # left [px] t_data.y1 = float(trackingAnnotation3D.bbox2d[1]) # top [px] t_data.x2 = float(trackingAnnotation3D.bbox2d[2]) # right [px] @@ -320,7 +322,8 @@ class TrackingEvaluator(object): if id_frame in id_frame_cache and not loading_groundtruth: raise ValueError( "track ids are not unique for sequence %d: frame %d" - % (seq, t_data.frame) + ( + % (seq, t_data.frame) + + ( " track id %d occured at least twice for this frame" % t_data.track_id ) @@ -328,34 +331,27 @@ class TrackingEvaluator(object): id_frame_cache.append(id_frame) f_data[t_data.frame].append(copy.copy(t_data)) - if ( - t_data.track_id not in ids and - t_data.obj_type != "dontcare" - ): + if t_data.track_id not in ids and t_data.obj_type != "dontcare": ids.append(t_data.track_id) n_trajectories += 1 n_in_seq += 1 # check if uploaded data provides information for 2D and 3D evaluation if ( - not loading_groundtruth and - eval_2d is True and - ( - t_data.x1 == -1 or - t_data.x2 == -1 or - t_data.y1 == -1 or - t_data.y2 == -1 + not loading_groundtruth + and eval_2d is True + and ( + t_data.x1 == -1 + or t_data.x2 == -1 + or t_data.y1 == -1 + or t_data.y2 == -1 ) ): eval_2d = False if ( - not loading_groundtruth and - eval_3d is True and - ( - t_data.X == -1000 or - t_data.Y == -1000 or - t_data.Z == -1000 - ) + not loading_groundtruth + and eval_3d is True + and (t_data.X == -1000 or t_data.Y == -1000 or t_data.Z == -1000) ): eval_3d = False @@ -396,14 +392,18 @@ class TrackingEvaluator(object): def box_overlap_3d(self, a: TrackingData, b: TrackingData, criterion="union"): - a_corners = convert_3dbox_to_8corner(np.array([a.X, a.Y, a.Z, a.yaw, a.h, a.w, a.length])) - b_corners = convert_3dbox_to_8corner(np.array([b.X, b.Y, b.Z, b.yaw, b.h, b.w, b.length])) + a_corners = convert_3dbox_to_8corner( + np.array([a.X, a.Y, a.Z, a.yaw, a.h, a.w, a.length]) + ) + b_corners = convert_3dbox_to_8corner( + np.array([b.X, b.Y, b.Z, b.yaw, b.h, b.w, b.length]) + ) result = iou3D(a_corners, b_corners)[0] return result - def compute(self): + def compute(self, threshold=-10000): """ Computes the metrics defined in - Stiefelhagen 2008: Evaluating Multiple Object Tracking Performance: The CLEAR MOT Metrics @@ -415,18 +415,56 @@ class TrackingEvaluator(object): # construct Munkres object for Hungarian Method association hm = Munkres() max_cost = 1e9 + self.scores = [] # go through all frames and associate ground truth and tracker results # groundtruth and tracker contain lists for every single frame containing lists of KITTI format detections fr, ids = 0, 0 for seq_idx in range(len(self.groundtruth)): self.log( - Logger.LOG_WHEN_NORMAL, "Computing metrics " + self.cls + - "[" + str(seq_idx + 1) + "/" + str(len(self.groundtruth)) + "]", end='\r' + Logger.LOG_WHEN_NORMAL, + "Computing metrics " + + self.cls + + "[" + + str(seq_idx + 1) + + "/" + + str(len(self.groundtruth)) + + "]", + end="\r", ) seq_gt = self.groundtruth[seq_idx] seq_dc = self.dcareas[seq_idx] # don't care areas - seq_tracker = self.tracker[seq_idx] + seq_tracker_unfiltered = self.tracker[seq_idx] + + tracker_id_score = {} + for frame in range(len(seq_tracker_unfiltered)): + tracks = seq_tracker_unfiltered[frame] + for index in range(len(tracks)): + track = tracks[index] + id = track.track_id + score = track.score + + if id not in tracker_id_score.keys(): + tracker_id_score[id] = [] + tracker_id_score[id].append(score) + + to_delete_id = [] + for track_id, score_list in tracker_id_score.items(): + average_score = sum(score_list) / float(len(score_list)) + if average_score < threshold: + to_delete_id.append(track_id) + + seq_tracker = [] + for frame in range(len(seq_tracker_unfiltered)): + seq_tracker_frame = list() + tracks = seq_tracker_unfiltered[frame] + for index in range(len(tracks)): + track = tracks[index] + id = track.track_id + if id not in to_delete_id: + seq_tracker_frame.append(track) + seq_tracker.append(seq_tracker_frame) + seq_trajectories = defaultdict(list) seq_ignored = defaultdict(list) @@ -493,9 +531,7 @@ class TrackingEvaluator(object): tmptp = 0 tmpfp = 0 tmpfn = 0 - tmpc = ( - 0 # this will sum up the overlaps for all true positives - ) + tmpc = 0 # this will sum up the overlaps for all true positives tmpcs = [0] * len( g ) # this will save the overlaps for all true positives @@ -520,6 +556,8 @@ class TrackingEvaluator(object): # true positives are only valid associations self.tp += 1 tmptp += 1 + + self.scores.append(t[col].score) else: g[row].tracker = -1 self.fn += 1 @@ -541,12 +579,11 @@ class TrackingEvaluator(object): tt_height = abs(tt.y1 - tt.y2) if ( - (self.cls == "car" and tt.obj_type == "van") or - ( - self.cls == "pedestrian" and - tt.obj_type == "person_sitting" - ) or - tt_height <= self.min_height + (self.cls == "car" and tt.obj_type == "van") + or ( + self.cls == "pedestrian" and tt.obj_type == "person_sitting" + ) + or tt_height <= self.min_height ) and not tt.valid: nignoredtracker += 1 tt.ignored = True @@ -563,9 +600,7 @@ class TrackingEvaluator(object): # check for ignored FN/TP (truncation or neighboring object class) ignoredfn = 0 # the number of ignored false negatives nignoredtp = 0 # the number of ignored true positives - nignoredpairs = ( - 0 # the number of ignored pairs, i.e. a true positive - ) + nignoredpairs = 0 # the number of ignored pairs, i.e. a true positive # which is ignored but where the associated tracker # detection has already been ignored @@ -573,12 +608,12 @@ class TrackingEvaluator(object): for gg in g: if gg.tracker < 0: if ( - gg.occlusion > self.max_occlusion or - gg.truncation > self.max_truncation or - (self.cls == "car" and gg.obj_type == "van") or - ( - self.cls == "pedestrian" and - gg.obj_type == "person_sitting" + gg.occlusion > self.max_occlusion + or gg.truncation > self.max_truncation + or (self.cls == "car" and gg.obj_type == "van") + or ( + self.cls == "pedestrian" + and gg.obj_type == "person_sitting" ) ): seq_ignored[gg.track_id][-1] = True @@ -587,12 +622,12 @@ class TrackingEvaluator(object): elif gg.tracker >= 0: if ( - gg.occlusion > self.max_occlusion or - gg.truncation > self.max_truncation or - (self.cls == "car" and gg.obj_type == "van") or - ( - self.cls == "pedestrian" and - gg.obj_type == "person_sitting" + gg.occlusion > self.max_occlusion + or gg.truncation > self.max_truncation + or (self.cls == "car" and gg.obj_type == "van") + or ( + self.cls == "pedestrian" + and gg.obj_type == "person_sitting" ) ): @@ -641,20 +676,8 @@ class TrackingEvaluator(object): # false positives = tracker bboxes - associated tracker bboxes # mismatches (mme_t) - tmpfp += ( - len(t) - - tmptp - - nignoredtracker - - nignoredtp + - nignoredpairs - ) - self.fp += ( - len(t) - - tmptp - - nignoredtracker - - nignoredtp + - nignoredpairs - ) + tmpfp += len(t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs + self.fp += len(t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs # tmpfp = len(t) - tmptp - nignoredtp # == len(t) - (tp - ignoredtp) - ignoredtp # self.fp += len(t) - tmptp - nignoredtp @@ -667,14 +690,27 @@ class TrackingEvaluator(object): seqigt += ignoredfn + nignoredtp seqitr += nignoredtracker - print( - "seqtp =", seqtp, - " seqitp =", seqitp, - " seqfn =", seqfn, - " seqifn =", seqifn, - " seqfp =", seqfp, - " seqigt =", seqigt, - " seqitr =", seqitr, + self.log( + Logger.LOG_WHEN_VERBOSE, + str(seq_idx + 1), + "/", + str(len(self.groundtruth)), + "seqtp =", + seqtp, + " seqitp =", + seqitp, + " seqfn =", + seqfn, + " seqifn =", + seqifn, + " seqfp =", + seqfp, + " seqigt =", + seqigt, + " seqitr =", + seqitr, + "\t\t\t", + end="\r", ) # sanity checks @@ -699,20 +735,12 @@ class TrackingEvaluator(object): if tmpfp < 0: raise NameError("Something went wrong! FP is negative") if tmptp + tmpfn is not len(g) - ignoredfn - nignoredtp: - raise NameError( - "Something went wrong! nGroundtruth is not TP+FN" - ) + raise NameError("Something went wrong! nGroundtruth is not TP+FN") if ( - tmptp + - tmpfp + - nignoredtp + - nignoredtracker - - nignoredpairs + tmptp + tmpfp + nignoredtp + nignoredtracker - nignoredpairs is not len(t) ): - raise NameError( - "Something went wrong! nTracker is not TP+FP" - ) + raise NameError("Something went wrong! nTracker is not TP+FP") # check for id switches or fragmentations for i, tt in enumerate(this_ids[0]): @@ -761,9 +789,7 @@ class TrackingEvaluator(object): continue tmpMT, tmpML, tmpPT, tmpId_switches, tmpFragments = [0] * 5 n_ignored_tr = 0 - for g, ign_g in zip( - seq_trajectories.values(), seq_ignored.values() - ): + for g, ign_g in zip(seq_trajectories.values(), seq_ignored.values()): # all frames of this gt trajectory are ignored if all(ign_g): n_ignored_tr += 1 @@ -785,19 +811,19 @@ class TrackingEvaluator(object): continue lgt += 1 if ( - last_id != g[f] and - last_id != -1 and - g[f] != -1 and - g[f - 1] != -1 + last_id != g[f] + and last_id != -1 + and g[f] != -1 + and g[f - 1] != -1 ): tmpId_switches += 1 self.id_switches += 1 if ( - f < len(g) - 1 and - g[f - 1] != g[f] and - last_id != -1 and - g[f] != -1 and - g[f + 1] != -1 + f < len(g) - 1 + and g[f - 1] != g[f] + and last_id != -1 + and g[f] != -1 + and g[f + 1] != -1 ): tmpFragments += 1 self.fragments += 1 @@ -806,11 +832,11 @@ class TrackingEvaluator(object): last_id = g[f] # handle last frame; tracked state is handled in for loop (g[f]!=-1) if ( - len(g) > 1 and - g[f - 1] != g[f] and - last_id != -1 and - g[f] != -1 and - not ign_g[f] + len(g) > 1 + and g[f - 1] != g[f] + and last_id != -1 + and g[f] != -1 + and not ign_g[f] ): tmpFragments += 1 self.fragments += 1 @@ -847,9 +873,7 @@ class TrackingEvaluator(object): self.F1 = 0.0 else: self.F1 = ( - 2.0 * - (self.precision * self.recall) / - (self.precision + self.recall) + 2.0 * (self.precision * self.recall) / (self.precision + self.recall) ) if sum(self.n_frames) == 0: self.FAR = "n/a" @@ -861,9 +885,7 @@ class TrackingEvaluator(object): self.MOTA = -float("inf") self.MODA = -float("inf") else: - self.MOTA = 1 - (self.fn + self.fp + self.id_switches) / float( - self.n_gt - ) + self.MOTA = 1 - (self.fn + self.fp + self.id_switches) / float(self.n_gt) self.MODA = 1 - (self.fn + self.fp) / float(self.n_gt) if self.tp == 0: self.MOTP = float("inf") @@ -871,9 +893,9 @@ class TrackingEvaluator(object): self.MOTP = self.total_cost / float(self.tp) if self.n_gt != 0: if self.id_switches == 0: - self.MOTAL = 1 - ( - self.fn + self.fp + self.id_switches - ) / float(self.n_gt) + self.MOTAL = 1 - (self.fn + self.fp + self.id_switches) / float( + self.n_gt + ) else: self.MOTAL = 1 - ( self.fn + self.fp + math.log10(self.id_switches) @@ -884,46 +906,47 @@ class TrackingEvaluator(object): self.MODP = "n/a" else: self.MODP = sum(self.MODP_t) / float(sum(self.n_frames)) + + self.num_gt = self.tp + self.fn return True + def print_entry(self, key, val,width=(70,10)): + + s_out = key.ljust(width[0]) + if type(val)==int: + s = "%%%dd" % width[1] + s_out += s % val + elif type(val)==float: + s = "%%%d.4f" % (width[1]) + s_out += s % val + else: + s_out += ("%s"%val).rjust(width[1]) + return s_out + def create_summary(self): - """ - Generate and mail a summary of the results. - If mailpy.py is present, the summary is instead printed. - """ summary = "" summary += "tracking evaluation summary".center(80, "=") + "\n" summary += ( - self.print_entry( - "Multiple Object Tracking Accuracy (MOTA)", self.MOTA - ) + - "\n" + self.print_entry("Multiple Object Tracking Accuracy (MOTA)", self.MOTA) + + "\n" ) summary += ( - self.print_entry( - "Multiple Object Tracking Precision (MOTP)", self.MOTP - ) + - "\n" + self.print_entry("Multiple Object Tracking Precision (MOTP)", self.MOTP) + + "\n" ) summary += ( - self.print_entry( - "Multiple Object Tracking Accuracy (MOTAL)", self.MOTAL - ) + - "\n" + self.print_entry("Multiple Object Tracking Accuracy (MOTAL)", self.MOTAL) + + "\n" ) summary += ( - self.print_entry( - "Multiple Object Detection Accuracy (MODA)", self.MODA - ) + - "\n" + self.print_entry("Multiple Object Detection Accuracy (MODA)", self.MODA) + + "\n" ) summary += ( - self.print_entry( - "Multiple Object Detection Precision (MODP)", self.MODP - ) + - "\n" + self.print_entry("Multiple Object Detection Precision (MODP)", self.MODP) + + "\n" ) summary += "\n" summary += self.print_entry("Recall", self.recall) + "\n" @@ -945,48 +968,122 @@ class TrackingEvaluator(object): summary += self.print_entry("Fragmentations", self.fragments) + "\n" summary += "\n" summary += ( - self.print_entry( - "Ground Truth Objects (Total)", self.n_gt + self.n_igt - ) + - "\n" + self.print_entry("Ground Truth Objects (Total)", self.n_gt + self.n_igt) + + "\n" ) + summary += self.print_entry("Ignored Ground Truth Objects", self.n_igt) + "\n" summary += ( - self.print_entry("Ignored Ground Truth Objects", self.n_igt) + "\n" - ) - summary += ( - self.print_entry( - "Ground Truth Trajectories", self.n_gt_trajectories - ) + - "\n" + self.print_entry("Ground Truth Trajectories", self.n_gt_trajectories) + "\n" ) summary += "\n" summary += self.print_entry("Tracker Objects (Total)", self.n_tr) + "\n" + summary += self.print_entry("Ignored Tracker Objects", self.n_itr) + "\n" summary += ( - self.print_entry("Ignored Tracker Objects", self.n_itr) + "\n" + self.print_entry("Tracker Trajectories", self.n_tr_trajectories) + "\n" ) - summary += ( - self.print_entry("Tracker Trajectories", self.n_tr_trajectories) + - "\n" + summary += "=" * 80 + + return summary + + def reset(self): + self.n_gt = 0 + self.n_igt = 0 + self.n_tr = 0 + self.n_itr = 0 + self.n_igttr = 0 + + self.MOTA = 0 + self.MOTP = 0 + self.MOTAL = 0 + self.MODA = 0 + self.MODP = 0 + self.MODP_t = [] + + self.recall = 0 + self.precision = 0 + self.F1 = 0 + self.FAR = 0 + + self.total_cost = 0 + self.itp = 0 + self.tp = 0 + self.fn = 0 + self.ifn = 0 + self.fp = 0 + + self.n_gts = [] + self.n_igts = [] + self.n_trs = [] + self.n_itrs = [] + + self.itps = [] + self.tps = [] + self.fns = [] + self.ifns = [] + self.fps = [] + + self.fragments = 0 + self.id_switches = 0 + self.MT = 0 + self.PT = 0 + self.ML = 0 + + self.gt_trajectories = [[] for _ in range(self.n_sequences)] + self.ign_trajectories = [[] for _ in range(self.n_sequences)] + + def log_summary_short(self, threshold): + summary = "" + + summary += ("evaluation with confidence threshold %f" % threshold).center( + 80, "=" + ) + "\n" + summary += " MOTA MOTP MODA MODP MT ML IDS FRAG F1 Prec Recall FAR TP FP FN\n" + + summary += "{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:5d} {:5d} {:.4f} {:.4f} {:.4f} {:.4f} {:5d} {:5d} {:5d}\n".format( + self.MOTA, + self.MOTP, + self.MODA, + self.MODP, + self.MT, + self.ML, + self.id_switches, + self.fragments, + self.F1, + self.precision, + self.recall, + self.FAR, + self.tp, + self.fp, + self.fn, ) summary += "=" * 80 + self.log(Logger.LOG_WHEN_NORMAL, summary) + return summary - def print_entry(self, key, val, width=(70, 10)): - """ - Pretty print an entry in a table fashion. - """ - s_out = key.ljust(width[0]) - if type(val) == int: - s = "%%%dd" % width[1] - s_out += s % val - elif type(val) == float: - s = "%%%df" % (width[1]) - s_out += s % val +def create_thresholds(scores, num_gt, num_sample_pts=11): + scores = np.array(scores) + scores.sort() + scores = scores[::-1] + current_recall = 0 + thresholds = [] + for i, score in enumerate(scores): + left_recall = (i + 1) / float(num_gt) + if i < (len(scores) - 1): + right_recall = (i + 2) / float(num_gt) else: - s_out += ("%s" % val).rjust(width[1]) - return s_out + right_recall = left_recall + if ((right_recall - current_recall) < (current_recall - left_recall)) and ( + i < (len(scores) - 1) + ): + continue + + thresholds.append(score) + current_recall += 1 / (num_sample_pts - 1.0) + + return thresholds def evaluate( @@ -1005,7 +1102,8 @@ def evaluate( for c in classes: evaluator = TrackingEvaluator( - cls=c, n_sequences=len(ground_truths), + cls=c, + n_sequences=len(ground_truths), n_frames=[len(a) for a in ground_truths], log=log, ) @@ -1018,8 +1116,23 @@ def evaluate( raise ValueError( "The predictions data does not provide results for every sequence." ) - return results + evaluator.compute() + best_mota, best_threshold = 0, -10000 + thresholds = create_thresholds(evaluator.scores, evaluator.num_gt) + + for threshold in thresholds: + evaluator.reset() + evaluator.compute(threshold) + mota = evaluator.MOTA + if mota > best_mota: + best_threshold = threshold + best_mota = mota + + evaluator.log_summary_short(threshold) + + evaluator.reset() + evaluator.compute(best_threshold) results[c] = evaluator.create_summary() diff --git a/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/kalman_tracker_3d.py b/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/kalman_tracker_3d.py index 502b4bf94cc6c7977bbd61a857502a7213ce37a5..0610cd123c381099f6469635718c854e92c7aef5 100644 --- a/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/kalman_tracker_3d.py +++ b/src/opendr/perception/object_tracking_3d/ab3dmot/algorithm/kalman_tracker_3d.py @@ -42,25 +42,25 @@ class KalmanTracker3D(): self.updates = 0 if state_transition_matrix is None: - state_transition_matrix = np.eye(state_dimensions, dtype=np.float32) + state_transition_matrix = np.eye(state_dimensions, dtype=np.int64) state_transition_matrix[0, -3] = 1 state_transition_matrix[1, -2] = 1 state_transition_matrix[2, -1] = 1 if measurement_function_matrix is None: measurement_function_matrix = np.eye( - measurement_dimensions, state_dimensions, dtype=np.float32 + measurement_dimensions, state_dimensions, dtype=np.int64 ) if covariance_matrix is None: covariance_matrix = np.eye( - state_dimensions, state_dimensions, dtype=np.float32 + state_dimensions, state_dimensions, dtype=np.float64 ) * 10 covariance_matrix[7:, 7:] *= 1000 if process_uncertainty_matrix is None: process_uncertainty_matrix = np.eye( - state_dimensions, state_dimensions, dtype=np.float32 + state_dimensions, state_dimensions, dtype=np.float64 ) process_uncertainty_matrix[7:, 7:] *= 0.01 diff --git a/src/opendr/perception/object_tracking_3d/ab3dmot/object_tracking_3d_ab3dmot_learner.py b/src/opendr/perception/object_tracking_3d/ab3dmot/object_tracking_3d_ab3dmot_learner.py index 7403f44b8514b0667375bace52569e62f96eb401..0f15595a61885117aa8aebe135e7e708b59b17bb 100644 --- a/src/opendr/perception/object_tracking_3d/ab3dmot/object_tracking_3d_ab3dmot_learner.py +++ b/src/opendr/perception/object_tracking_3d/ab3dmot/object_tracking_3d_ab3dmot_learner.py @@ -99,22 +99,60 @@ class ObjectTracking3DAb3dmotLearner(Learner): inputs = [] ground_truths = [] + # for i in [1]: for i in range(count): self.reset() input, ground_truth = dataset[i] - predictions.append(self.infer(input)) + predictions.append(self.infer(input, gt=ground_truth)) ground_truths.append(ground_truth) inputs.append(input) logger.log(Logger.LOG_WHEN_NORMAL, "Computing tracklets [" + str(i + 1) + "/" + str(count) + "]", end='\r') + + result = evaluate_kitti_tracking(predictions, ground_truths, log=logger.log) + + logger.close() + + return result + + def dataset_eval( + self, + dataset, + logging_path=None, + silent=False, + verbose=False, + count=None + ): + + logger = Logger(silent, verbose, logging_path) + + if not isinstance(dataset, DatasetIterator): + raise ValueError("dataset should be a DatasetIterator") + + if count is None: + count = len(dataset) + + predictions = [] + inputs = [] + ground_truths = [] + + for i in [1]: + # for i in range(count): + self.reset() + input, ground_truth = dataset[i] + predictions.append(input) + ground_truths.append(ground_truth) + + logger.log(Logger.LOG_WHEN_NORMAL, "Computing tracklets [" + str(i + 1) + "/" + str(count) + "]", end='\r') + result = evaluate_kitti_tracking(predictions, ground_truths, log=logger.log) logger.close() return result - def infer(self, bounding_boxes_3d_list): + def infer(self, bounding_boxes_3d_list, gt=None): if self.model is None: raise ValueError("No model created") @@ -132,7 +170,8 @@ class ObjectTracking3DAb3dmotLearner(Learner): results = [] - for box_list in bounding_boxes_3d_list: + for i, box_list in enumerate(bounding_boxes_3d_list): + t0 = time.time() @@ -140,8 +179,13 @@ class ObjectTracking3DAb3dmotLearner(Learner): results.append(result) t0 = time.time() - t0 + + print("frame", i) + gt[i].d() + self.infers_count += 1 self.infers_time += t0 + if is_single_input: results = results[0] diff --git a/src/opendr/perception/object_tracking_3d/datasets/kitti_tracking.py b/src/opendr/perception/object_tracking_3d/datasets/kitti_tracking.py index c69f0a5b3765c6eb2bb9893934e51669427540c1..b2ea3c86e8570ff6063b88a6444e207969c7df54 100644 --- a/src/opendr/perception/object_tracking_3d/datasets/kitti_tracking.py +++ b/src/opendr/perception/object_tracking_3d/datasets/kitti_tracking.py @@ -39,13 +39,15 @@ from opendr.perception.object_detection_3d.datasets.kitti import parse_calib class KittiTrackingDatasetIterator(DatasetIterator): def __init__( self, inputs_path, ground_truths_path, - inputs_format="detection" # detection, tracking + inputs_format="detection", # detection, tracking + inputs_return_format="detection", ): super().__init__() self.inputs_path = inputs_path self.ground_truths_path = ground_truths_path self.inputs_format = inputs_format + self.inputs_return_format = inputs_return_format self.inputs_files = sorted(os.listdir(self.inputs_path)) self.ground_truths_files = sorted(os.listdir(self.ground_truths_path)) @@ -138,7 +140,7 @@ class KittiTrackingDatasetIterator(DatasetIterator): ): input, _ = load_tracking_file( - os.path.join(self.inputs_path, input_file), self.inputs_format, "detection", remove_dontcare=True + os.path.join(self.inputs_path, input_file), self.inputs_format, self.inputs_return_format, remove_dontcare=True ) ground_truth, _ = load_tracking_file( os.path.join(self.ground_truths_path, ground_truth_file), "tracking", "tracking"