ultralytics 8.0.90 actions and docs improvements (#2326)

Co-authored-by: calmisential <xinyu_std@163.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: triple Mu <gpu@163.com>
Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Co-authored-by: Laughing-q <1185102784@qq.com>
Co-authored-by: ran xiao <ben.xiao@me.com>
Co-authored-by: rxiao <ran.xiao@silverpond.com.au>
This commit is contained in:
Glenn Jocher
2023-04-29 20:16:56 +02:00
committed by GitHub
parent 243fc4b1fe
commit 44c7c3514d
39 changed files with 783 additions and 143 deletions

View File

@ -6,6 +6,18 @@ from ultralytics.yolo.utils.torch_utils import select_device
def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None):
"""
Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
Args:
data (str): Path to a folder containing images to be annotated.
det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
output_dir (str, None, optional): Directory to save the annotated results.
Defaults to a 'labels' folder in the same directory as 'data'.
"""
device = select_device(device)
det_model = YOLO(det_model)
sam_model = build_sam(sam_model)
@ -33,7 +45,7 @@ def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='',
result.update(masks=masks.squeeze(1))
segments = result.masks.xyn # noqa
with open(str(Path(output_dir) / Path(result.path).stem) + '.txt', 'w') as f:
with open(f'{str(Path(output_dir) / Path(result.path).stem)}.txt', 'w') as f:
for i in range(len(segments)):
s = segments[i]
if len(s) == 0:

View File

@ -141,11 +141,8 @@ def load_inference_source(source=None, imgsz=640, vid_stride=1):
Args:
source (str, Path, Tensor, PIL.Image, np.ndarray): The input source for inference.
transforms (callable, optional): Custom transformations to be applied to the input source.
imgsz (int, optional): The size of the image for inference. Default is 640.
vid_stride (int, optional): The frame interval for video sources. Default is 1.
stride (int, optional): The model stride. Default is 32.
auto (bool, optional): Automatically apply pre-processing. Default is True.
Returns:
dataset (Dataset): A dataset object for the specified input source.

View File

@ -72,9 +72,6 @@ class LoadStreams:
# Check for common shapes
self.bs = self.__len__()
if not self.rect:
LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
def update(self, i, cap, stream):
"""Read stream `i` frames in daemon thread."""
n, f = 0, self.frames[i] # frame number, frame array

View File

@ -116,6 +116,9 @@ class BasePredictor:
"""
if not isinstance(im, torch.Tensor):
auto = all(x.shape == im[0].shape for x in im) and self.model.pt
if not auto:
LOGGER.warning(
'WARNING ⚠️ Source shapes differ. For optimal performance supply similarly-shaped sources.')
im = np.stack([LetterBox(self.imgsz, auto=auto, stride=self.model.stride)(image=x) for x in im])
im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
im = np.ascontiguousarray(im) # contiguous
@ -217,7 +220,8 @@ class BasePredictor:
self.run_callbacks('on_predict_batch_start')
self.batch = batch
path, im0s, vid_cap, s = batch
visualize = increment_path(self.save_dir / Path(path).stem, mkdir=True) if self.args.visualize else False
visualize = increment_path(self.save_dir / Path(path[0]).stem,
mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False
# Preprocess
with self.dt[0]:
@ -298,7 +302,7 @@ class BasePredictor:
cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
cv2.imshow(str(p), im0)
cv2.waitKey(500 if self.batch[4].startswith('image') else 1) # 1 millisecond
cv2.waitKey(500 if self.batch[3].startswith('image') else 1) # 1 millisecond
def save_preds(self, vid_cap, idx, save_path):
"""Save video predictions as mp4 at specified path."""

View File

@ -205,7 +205,7 @@ class FocalLoss(nn.Module):
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
else: # 'None'
return loss

View File

@ -148,7 +148,7 @@ def non_max_suppression(
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
Arguments:
prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks)
prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
containing the predicted boxes, classes, and masks. The tensor should be in the format
output by a model, such as YOLO.
conf_thres (float): The confidence threshold below which boxes will be filtered out.

View File

@ -469,3 +469,39 @@ def output_to_target(output, max_det=300):
targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1))
targets = torch.cat(targets, 0).numpy()
return targets[:, 0], targets[:, 1], targets[:, 2:]
def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
"""
Visualize feature maps of a given model module during inference.
Args:
x (torch.Tensor): Features to be visualized.
module_type (str): Module type.
stage (int): Module stage within the model.
n (int, optional): Maximum number of feature maps to plot. Defaults to 32.
save_dir (Path, optional): Directory to save results. Defaults to Path('runs/detect/exp').
Returns:
None: This function does not return any value; it saves the visualization to the specified directory.
"""
for m in ['Detect', 'Pose', 'Segment']:
if m in module_type:
return
batch, channels, height, width = x.shape # batch, channels, height, width
if height > 1 and width > 1:
f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
n = min(n, channels) # number of plots
fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols
ax = ax.ravel()
plt.subplots_adjust(wspace=0.05, hspace=0.05)
for i in range(n):
ax[i].imshow(blocks[i].squeeze()) # cmap='gray'
ax[i].axis('off')
LOGGER.info(f'Saving {f}... ({n}/{channels})')
plt.savefig(f, dpi=300, bbox_inches='tight')
plt.close()
np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save

View File

@ -27,7 +27,7 @@ class DetectionTrainer(BaseTrainer):
Args:
img_path (str): Path to the folder containing images.
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
batch_size (int, optional): Size of batches, this is for `rect`. Defaults to None.
batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
"""
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == 'val', stride=gs)

View File

@ -177,7 +177,7 @@ class DetectionValidator(BaseValidator):
Args:
img_path (str): Path to the folder containing images.
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
batch_size (int, optional): Size of batches, this is for `rect`. Defaults to None.
batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
"""
gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, stride=gs)