diff --git a/docs/modes/predict.md b/docs/modes/predict.md index 51cb921..3deee7c 100644 --- a/docs/modes/predict.md +++ b/docs/modes/predict.md @@ -138,7 +138,8 @@ The `Results` object contains the following components: - `Results.boxes`: `Boxes` object with properties and methods for manipulating bounding boxes - `Results.masks`: `Masks` object for indexing masks or getting segment coordinates -- `Results.probs`: `torch.Tensor` containing class probabilities or logits +- `Results.keypoints`: `Keypoints` object for with properties and methods for manipulating predicted keypoints. +- `Results.probs`: `Probs` object for containing class probabilities. - `Results.orig_img`: Original image loaded in memory - `Results.path`: `Path` containing the path to the input image @@ -178,8 +179,8 @@ operations are cached, meaning they're only calculated once per object, and thos boxes.xywh # box with xywh format, (N, 4) boxes.xyxyn # box with xyxy format but normalized, (N, 4) boxes.xywhn # box with xywh format but normalized, (N, 4) - boxes.conf # confidence score, (N, 1) - boxes.cls # cls, (N, 1) + boxes.conf # confidence score, (N, ) + boxes.cls # cls, (N, ) boxes.data # raw bboxes tensor, (N, 6) or boxes.boxes ``` @@ -197,15 +198,35 @@ operations are cached, meaning they're only calculated once per object, and thos masks.data # raw masks tensor, (N, H, W) or masks.masks ``` +### Keypoints + +`Keypoints` object can be used index, manipulate and normalize coordinates. The keypoint conversion operation is cached. + +!!! example "Keypoints" + + ```python + results = model(inputs) + keypoints = results[0].keypoints # Masks object + keypoints.xy # x, y keypoints (pixels), (num_dets, num_kpts, 2/3), the last dimension can be 2 or 3, depends the model. + keypoints.xyn # x, y keypoints (normalized), (num_dets, num_kpts, 2/3) + keypoints.conf # confidence score(num_dets, num_kpts) of each keypoint if the last dimension is 3. + keypoints.data # raw keypoints tensor, (num_dets, num_kpts, 2/3) + ``` + ### probs -`probs` attribute of `Results` class is a `Tensor` containing class probabilities of a classification operation. +`Probs` object can be used index, get top1&top5 indices and scores of classification. !!! example "Probs" ```python results = model(inputs) - results[0].probs # cls prob, (num_class, ) + probs = results[0].probs # cls prob, (num_class, ) + probs.top5 # The top5 indices of classification, List[Int] * 5. + probs.top1 # The top1 indices of classification, a value with Int type. + probs.top5conf # The top5 scores of classification, a tensor with shape (5, ). + probs.top1conf # The top1 scores of classification. a value with torch.tensor type. + keypoints.data # raw probs tensor, (num_class, ) ``` Class reference documentation for `Results` module and its components can be found [here](../reference/yolo/engine/results.md) @@ -213,7 +234,7 @@ Class reference documentation for `Results` module and its components can be fou ## Plotting results You can use `plot()` function of `Result` object to plot results on in image object. It plots all components(boxes, -masks, classification logits, etc.) found in the results object +masks, classification probabilities, etc.) found in the results object !!! example "Plotting" diff --git a/tests/test_python.py b/tests/test_python.py index 9616d6f..53e2789 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -220,9 +220,10 @@ def _test_results_api(res): res.plot(pil=True) res.plot(conf=True, boxes=False) res.plot() + print(res) print(res.path) for k in res.keys: - print(getattr(res, k).data) + print(getattr(res, k)) def test_results(): diff --git a/ultralytics/yolo/engine/results.py b/ultralytics/yolo/engine/results.py index 8d1e981..bde922f 100644 --- a/ultralytics/yolo/engine/results.py +++ b/ultralytics/yolo/engine/results.py @@ -71,9 +71,9 @@ class Results(SimpleClass): orig_img (numpy.ndarray): The original image as a numpy array. path (str): The path to the image file. names (dict): A dictionary of class names. - boxes (List[List[float]], optional): A list of bounding box coordinates for each detection. - masks (numpy.ndarray, optional): A 3D numpy array of detection masks, where each mask is a binary image. - probs (numpy.ndarray, optional): A 2D numpy array of detection probabilities for each class. + boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection. + masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image. + probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task. keypoints (List[List[float]], optional): A list of detected keypoints for each object. @@ -82,10 +82,10 @@ class Results(SimpleClass): orig_shape (tuple): The original image shape in (height, width) format. boxes (Boxes, optional): A Boxes object containing the detection bounding boxes. masks (Masks, optional): A Masks object containing the detection masks. - probs (numpy.ndarray, optional): A 2D numpy array of detection probabilities for each class. + probs (Probs, optional): A Probs object containing probabilities of each class for classification task. names (dict): A dictionary of class names. path (str): The path to the image file. - keypoints (List[List[float]], optional): A list of detected keypoints for each object. + keypoints (Keypoints, optional): A Keypoints object containing detected keypoints for each object. speed (dict): A dictionary of preprocess, inference and postprocess speeds in milliseconds per image. _keys (tuple): A tuple of attribute names for non-empty attributes. """ @@ -552,7 +552,7 @@ class Keypoints(BaseTensor): @property @lru_cache(maxsize=1) def conf(self): - return self.data[..., 3] if self.has_visible else None + return self.data[..., 2] if self.has_visible else None class Probs(BaseTensor):