ultralytics 8.0.90
actions and docs improvements (#2326)
Co-authored-by: calmisential <xinyu_std@163.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: triple Mu <gpu@163.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: ran xiao <ben.xiao@me.com> Co-authored-by: rxiao <ran.xiao@silverpond.com.au>
This commit is contained in:
@ -6,6 +6,18 @@ from ultralytics.yolo.utils.torch_utils import select_device
|
||||
|
||||
|
||||
def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None):
|
||||
"""
|
||||
Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
|
||||
|
||||
Args:
|
||||
data (str): Path to a folder containing images to be annotated.
|
||||
det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
|
||||
sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
|
||||
device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
|
||||
output_dir (str, None, optional): Directory to save the annotated results.
|
||||
Defaults to a 'labels' folder in the same directory as 'data'.
|
||||
|
||||
"""
|
||||
device = select_device(device)
|
||||
det_model = YOLO(det_model)
|
||||
sam_model = build_sam(sam_model)
|
||||
@ -33,7 +45,7 @@ def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='',
|
||||
result.update(masks=masks.squeeze(1))
|
||||
segments = result.masks.xyn # noqa
|
||||
|
||||
with open(str(Path(output_dir) / Path(result.path).stem) + '.txt', 'w') as f:
|
||||
with open(f'{str(Path(output_dir) / Path(result.path).stem)}.txt', 'w') as f:
|
||||
for i in range(len(segments)):
|
||||
s = segments[i]
|
||||
if len(s) == 0:
|
||||
|
@ -141,11 +141,8 @@ def load_inference_source(source=None, imgsz=640, vid_stride=1):
|
||||
|
||||
Args:
|
||||
source (str, Path, Tensor, PIL.Image, np.ndarray): The input source for inference.
|
||||
transforms (callable, optional): Custom transformations to be applied to the input source.
|
||||
imgsz (int, optional): The size of the image for inference. Default is 640.
|
||||
vid_stride (int, optional): The frame interval for video sources. Default is 1.
|
||||
stride (int, optional): The model stride. Default is 32.
|
||||
auto (bool, optional): Automatically apply pre-processing. Default is True.
|
||||
|
||||
Returns:
|
||||
dataset (Dataset): A dataset object for the specified input source.
|
||||
|
@ -72,9 +72,6 @@ class LoadStreams:
|
||||
# Check for common shapes
|
||||
self.bs = self.__len__()
|
||||
|
||||
if not self.rect:
|
||||
LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
|
||||
|
||||
def update(self, i, cap, stream):
|
||||
"""Read stream `i` frames in daemon thread."""
|
||||
n, f = 0, self.frames[i] # frame number, frame array
|
||||
|
@ -116,6 +116,9 @@ class BasePredictor:
|
||||
"""
|
||||
if not isinstance(im, torch.Tensor):
|
||||
auto = all(x.shape == im[0].shape for x in im) and self.model.pt
|
||||
if not auto:
|
||||
LOGGER.warning(
|
||||
'WARNING ⚠️ Source shapes differ. For optimal performance supply similarly-shaped sources.')
|
||||
im = np.stack([LetterBox(self.imgsz, auto=auto, stride=self.model.stride)(image=x) for x in im])
|
||||
im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
@ -217,7 +220,8 @@ class BasePredictor:
|
||||
self.run_callbacks('on_predict_batch_start')
|
||||
self.batch = batch
|
||||
path, im0s, vid_cap, s = batch
|
||||
visualize = increment_path(self.save_dir / Path(path).stem, mkdir=True) if self.args.visualize else False
|
||||
visualize = increment_path(self.save_dir / Path(path[0]).stem,
|
||||
mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False
|
||||
|
||||
# Preprocess
|
||||
with self.dt[0]:
|
||||
@ -298,7 +302,7 @@ class BasePredictor:
|
||||
cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
|
||||
cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
|
||||
cv2.imshow(str(p), im0)
|
||||
cv2.waitKey(500 if self.batch[4].startswith('image') else 1) # 1 millisecond
|
||||
cv2.waitKey(500 if self.batch[3].startswith('image') else 1) # 1 millisecond
|
||||
|
||||
def save_preds(self, vid_cap, idx, save_path):
|
||||
"""Save video predictions as mp4 at specified path."""
|
||||
|
@ -205,7 +205,7 @@ class FocalLoss(nn.Module):
|
||||
return loss.mean()
|
||||
elif self.reduction == 'sum':
|
||||
return loss.sum()
|
||||
else: # 'none'
|
||||
else: # 'None'
|
||||
return loss
|
||||
|
||||
|
||||
|
@ -148,7 +148,7 @@ def non_max_suppression(
|
||||
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
|
||||
|
||||
Arguments:
|
||||
prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks)
|
||||
prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
|
||||
containing the predicted boxes, classes, and masks. The tensor should be in the format
|
||||
output by a model, such as YOLO.
|
||||
conf_thres (float): The confidence threshold below which boxes will be filtered out.
|
||||
|
@ -469,3 +469,39 @@ def output_to_target(output, max_det=300):
|
||||
targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
|
||||
targets = torch.cat(targets, 0).numpy()
|
||||
return targets[:, 0], targets[:, 1], targets[:, 2:]
|
||||
|
||||
|
||||
def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
|
||||
"""
|
||||
Visualize feature maps of a given model module during inference.
|
||||
|
||||
Args:
|
||||
x (torch.Tensor): Features to be visualized.
|
||||
module_type (str): Module type.
|
||||
stage (int): Module stage within the model.
|
||||
n (int, optional): Maximum number of feature maps to plot. Defaults to 32.
|
||||
save_dir (Path, optional): Directory to save results. Defaults to Path('runs/detect/exp').
|
||||
|
||||
Returns:
|
||||
None: This function does not return any value; it saves the visualization to the specified directory.
|
||||
"""
|
||||
for m in ['Detect', 'Pose', 'Segment']:
|
||||
if m in module_type:
|
||||
return
|
||||
batch, channels, height, width = x.shape # batch, channels, height, width
|
||||
if height > 1 and width > 1:
|
||||
f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
|
||||
|
||||
blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
|
||||
n = min(n, channels) # number of plots
|
||||
fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols
|
||||
ax = ax.ravel()
|
||||
plt.subplots_adjust(wspace=0.05, hspace=0.05)
|
||||
for i in range(n):
|
||||
ax[i].imshow(blocks[i].squeeze()) # cmap='gray'
|
||||
ax[i].axis('off')
|
||||
|
||||
LOGGER.info(f'Saving {f}... ({n}/{channels})')
|
||||
plt.savefig(f, dpi=300, bbox_inches='tight')
|
||||
plt.close()
|
||||
np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save
|
||||
|
@ -27,7 +27,7 @@ class DetectionTrainer(BaseTrainer):
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
|
||||
batch_size (int, optional): Size of batches, this is for `rect`. Defaults to None.
|
||||
batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
|
||||
"""
|
||||
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
|
||||
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == 'val', stride=gs)
|
||||
|
@ -177,7 +177,7 @@ class DetectionValidator(BaseValidator):
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
|
||||
batch_size (int, optional): Size of batches, this is for `rect`. Defaults to None.
|
||||
batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
|
||||
"""
|
||||
gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
|
||||
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, stride=gs)
|
||||
|
Reference in New Issue
Block a user