HUB setup (#108)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2023-01-02 00:51:14 +05:30
parent c6eb6720de
commit 2bc9a5c87e
16 changed files with 631 additions and 122 deletions
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -2,4 +2,4 @@ __version__ = "8.0.0.dev0"
 from ultralytics.yolo.engine.model import YOLO
-__all__ = ["__version__", "YOLO"]  # allow simpler import
+__all__ = ["__version__", "YOLO", "hub"]  # allow simpler import
--- a/ultralytics/hub/init.py
+++ b/ultralytics/hub/init.py
@ -0,0 +1,131 @@
 import os
 import shutil
 import psutil
 import requests
 from IPython import display  # to display images and clear console output
 from ultralytics.hub.auth import Auth
 from ultralytics.hub.session import HubTrainingSession
 from ultralytics.hub.utils import PREFIX, split_key
 from ultralytics.yolo.utils import LOGGER, emojis, is_colab
 from ultralytics.yolo.utils.torch_utils import select_device
 from ultralytics.yolo.v8.detect import DetectionTrainer
 def checks(verbose=True):
    if is_colab():
        shutil.rmtree('sample_data', ignore_errors=True)  # remove colab /sample_data directory
    if verbose:
        # System info
        gib = 1 << 30  # bytes per GiB
        ram = psutil.virtual_memory().total
        total, used, free = shutil.disk_usage("/")
        display.clear_output()
        s = f'({os.cpu_count()} CPUs, {ram / gib:.1f} GB RAM, {(total - free) / gib:.1f}/{total / gib:.1f} GB disk)'
    else:
        s = ''
    select_device(newline=False)
    LOGGER.info(f'Setup complete ✅ {s}')
 def start(key=''):
    # Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY')
    def request_api_key(attempts=0):
        """Prompt the user to input their API key"""
        import getpass
        max_attempts = 3
        tries = f"Attempt {str(attempts + 1)} of {max_attempts}" if attempts > 0 else ""
        LOGGER.info(f"{PREFIX}Login. {tries}")
        input_key = getpass.getpass("Enter your Ultralytics HUB API key:\n")
        auth.api_key, model_id = split_key(input_key)
        if not auth.authenticate():
            attempts += 1
            LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n")
            if attempts < max_attempts:
                return request_api_key(attempts)
            raise ConnectionError(emojis(f"{PREFIX}Failed to authenticate ❌"))
        else:
            return model_id
    try:
        api_key, model_id = split_key(key)
        auth = Auth(api_key)  # attempts cookie login if no api key is present
        attempts = 1 if len(key) else 0
        if not auth.get_state():
            if len(key):
                LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n")
            model_id = request_api_key(attempts)
        LOGGER.info(f"{PREFIX}Authenticated ✅")
        if not model_id:
            raise ConnectionError(emojis('Connecting with global API key is not currently supported. ❌'))
        session = HubTrainingSession(model_id=model_id, auth=auth)
        session.check_disk_space()
        # TODO: refactor, hardcoded for v8
        args = session.model.copy()
        args.pop("id")
        args.pop("status")
        args.pop("weights")
        args["data"] = "coco128.yaml"
        args["model"] = "yolov8n.yaml"
        args["batch_size"] = 16
        args["imgsz"] = 64
        trainer = DetectionTrainer(overrides=args)
        session.register_callbacks(trainer)
        setattr(trainer, 'hub_session', session)
        trainer.train()
    except Exception as e:
        LOGGER.warning(f"{PREFIX}{e}")
 def reset_model(key=''):
    # Reset a trained model to an untrained state
    api_key, model_id = split_key(key)
    r = requests.post('https://api.ultralytics.com/model-reset', json={"apiKey": api_key, "modelId": model_id})
    if r.status_code == 200:
        LOGGER.info(f"{PREFIX}model reset successfully")
        return
    LOGGER.warning(f"{PREFIX}model reset failure {r.status_code} {r.reason}")
 def export_model(key='', format='torchscript'):
    # Export a model to all formats
    api_key, model_id = split_key(key)
    formats = ('torchscript', 'onnx', 'openvino', 'engine', 'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs',
               'ultralytics_tflite', 'ultralytics_coreml')
    assert format in formats, f"ERROR: Unsupported export format '{format}' passed, valid formats are {formats}"
    r = requests.post('https://api.ultralytics.com/export',
                      json={
                          "apiKey": api_key,
                          "modelId": model_id,
                          "format": format})
    assert r.status_code == 200, f"{PREFIX}{format} export failure {r.status_code} {r.reason}"
    LOGGER.info(f"{PREFIX}{format} export started ✅")
 def get_export(key='', format='torchscript'):
    # Get an exported model dictionary with download URL
    api_key, model_id = split_key(key)
    formats = ('torchscript', 'onnx', 'openvino', 'engine', 'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs',
               'ultralytics_tflite', 'ultralytics_coreml')
    assert format in formats, f"ERROR: Unsupported export format '{format}' passed, valid formats are {formats}"
    r = requests.post('https://api.ultralytics.com/get-export',
                      json={
                          "apiKey": api_key,
                          "modelId": model_id,
                          "format": format})
    assert r.status_code == 200, f"{PREFIX}{format} get_export failure {r.status_code} {r.reason}"
    return r.json()
 # temp. For checking
 if __name__ == "__main__":
    start(key="b3fba421be84a20dbe68644e14436d1cce1b0a0aaa_HeMfHgvHsseMPhdq7Ylz")
--- a/ultralytics/hub/auth.py
+++ b/ultralytics/hub/auth.py
@ -0,0 +1,69 @@
 import requests
 from ultralytics.hub.config import HUB_API_ROOT
 from ultralytics.hub.utils import request_with_credentials
 from ultralytics.yolo.utils import is_colab
 API_KEY_PATH = "https://hub.ultralytics.com/settings?tab=api+keys"
 class Auth:
    id_token = api_key = model_key = False
    def __init__(self, api_key=None):
        self.api_key = self._clean_api_key(api_key)
        self.authenticate() if self.api_key else self.auth_with_cookies()
    @staticmethod
    def _clean_api_key(key: str) -> str:
        """Strip model from key if present"""
        separator = "_"
        return key.split(separator)[0] if separator in key else key
    def authenticate(self) -> bool:
        """Attempt to authenticate with server"""
        try:
            header = self.get_auth_header()
            if header:
                r = requests.post(f"{HUB_API_ROOT}/v1/auth", headers=header)
                if not r.json().get('success', False):
                    raise ConnectionError("Unable to authenticate.")
                return True
            raise ConnectionError("User has not authenticated locally.")
        except ConnectionError:
            self.id_token = self.api_key = False  # reset invalid
            return False
    def auth_with_cookies(self) -> bool:
        """
        Attempt to fetch authentication via cookies and set id_token.
        User must be logged in to HUB and running in a supported browser.
        """
        if not is_colab():
            return False  # Currently only works with Colab
        try:
            authn = request_with_credentials(f"{HUB_API_ROOT}/v1/auth/auto")
            if authn.get("success", False):
                self.id_token = authn.get("data", {}).get("idToken", None)
                self.authenticate()
                return True
            raise ConnectionError("Unable to fetch browser authentication details.")
        except ConnectionError:
            self.id_token = False  # reset invalid
            return False
    def get_auth_header(self):
        if self.id_token:
            return {"authorization": f"Bearer {self.id_token}"}
        elif self.api_key:
            return {"x-api-key": self.api_key}
        else:
            return None
    def get_state(self) -> bool:
        """Get the authentication state"""
        return self.id_token or self.api_key
    def set_api_key(self, key: str):
        """Get the authentication state"""
        self.api_key = key
--- a/ultralytics/hub/config.py
+++ b/ultralytics/hub/config.py
@ -0,0 +1,12 @@
 import os
 # Global variables
 REPO_URL = "https://github.com/ultralytics/yolov5.git"
 REPO_BRANCH = "ultralytics/HUB"  # "master"
 ENVIRONMENT = os.environ.get("ULTRALYTICS_ENV", "production")
 if ENVIRONMENT == 'production':
    HUB_API_ROOT = "https://api.ultralytics.com"
 else:
    HUB_API_ROOT = "http://127.0.0.1:8000"
    print(f'Connected to development server on {HUB_API_ROOT}')
--- a/ultralytics/hub/session.py
+++ b/ultralytics/hub/session.py
@ -0,0 +1,121 @@
 import signal
 import sys
 from pathlib import Path
 from time import sleep
 import requests
 from ultralytics import __version__
 from ultralytics.hub.config import HUB_API_ROOT
 from ultralytics.hub.utils import check_dataset_disk_space, smart_request
 from ultralytics.yolo.utils import LOGGER, is_colab, threaded
 AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local'
 session = None
 def signal_handler(signum, frame):
    """ Confirm exit """
    global hub_logger
    LOGGER.info(f'Signal received. {signum} {frame}')
    if isinstance(session, HubTrainingSession):
        hub_logger.alive = False
        del hub_logger
    sys.exit(signum)
 signal.signal(signal.SIGTERM, signal_handler)
 signal.signal(signal.SIGINT, signal_handler)
 class HubTrainingSession:
    def __init__(self, model_id, auth):
        self.agent_id = None  # identifies which instance is communicating with server
        self.model_id = model_id
        self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}'
        self.auth_header = auth.get_auth_header()
        self.rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0}  # rate limits (seconds)
        self.t = {}  # rate limit timers (seconds)
        self.metrics_queue = {}  # metrics queue
        self.alive = True  # for heartbeats
        self.model = self._get_model()
        self._heartbeats()  # start heartbeats
    def __del__(self):
        # Class destructor
        self.alive = False
    def upload_metrics(self):
        payload = {"metrics": self.metrics_queue.copy(), "type": "metrics"}
        smart_request(f'{self.api_url}', json=payload, headers=self.auth_header, code=2)
    def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
        # Upload a model to HUB
        file = None
        if Path(weights).is_file():
            with open(weights, "rb") as f:
                file = f.read()
        if final:
            smart_request(f'{self.api_url}/upload',
                          data={
                              "epoch": epoch,
                              "type": "final",
                              "map": map},
                          files={"best.pt": file},
                          headers=self.auth_header,
                          retry=10,
                          timeout=3600,
                          code=4)
        else:
            smart_request(f'{self.api_url}/upload',
                          data={
                              "epoch": epoch,
                              "type": "epoch",
                              "isBest": bool(is_best)},
                          headers=self.auth_header,
                          files={"last.pt": file},
                          code=3)
    def _get_model(self):
        # Returns model from database by id
        api_url = f"{HUB_API_ROOT}/v1/models/{self.model_id}"
        headers = self.auth_header
        try:
            r = smart_request(api_url, method="get", headers=headers, thread=False, code=0)
            data = r.json().get("data", None)
            if not data:
                return
            assert data['data'], 'ERROR: Dataset may still be processing. Please wait a minute and try again.'  # RF fix
            self.model_id = data["id"]
            return data
        except requests.exceptions.ConnectionError as e:
            raise ConnectionRefusedError('ERROR: The HUB server is not online. Please try again later.') from e
    def check_disk_space(self):
        if not check_dataset_disk_space(self.model['data']):
            raise MemoryError("Not enough disk space")
    # COMMENT: Should not be needed as HUB is now considered an integration and is in integrations_callbacks
    # import ultralytics.yolo.utils.callbacks.hub as hub_callbacks
    # @staticmethod
    # def register_callbacks(trainer):
    #     for k, v in hub_callbacks.callbacks.items():
    #         trainer.add_callback(k, v)
    @threaded
    def _heartbeats(self):
        while self.alive:
            r = smart_request(f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}',
                              json={
                                  "agent": AGENT_NAME,
                                  "agentId": self.agent_id},
                              headers=self.auth_header,
                              retry=0,
                              code=5,
                              thread=False)
            self.agent_id = r.json().get('data', {}).get('agentId', None)
            sleep(self.rate_limits['heartbeat'])
--- a/ultralytics/hub/utils.py
+++ b/ultralytics/hub/utils.py
@ -0,0 +1,139 @@
 import shutil
 import threading
 import time
 import uuid
 import requests
 from ultralytics.hub.config import HUB_API_ROOT
 from ultralytics.yolo.utils import LOGGER, RANK, SETTINGS, colorstr, emojis
 PREFIX = colorstr('Ultralytics: ')
 HELP_MSG = 'If this issue persists please visit https://github.com/ultralytics/hub/issues for assistance.'
 def check_dataset_disk_space(url='https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip', sf=2.0):
    # Check that url fits on disk with safety factor sf, i.e. require 2GB free if url size is 1GB with sf=2.0
    gib = 1 << 30  # bytes per GiB
    data = int(requests.head(url).headers['Content-Length']) / gib  # dataset size (GB)
    total, used, free = (x / gib for x in shutil.disk_usage("/"))  # bytes
    LOGGER.info(f'{PREFIX}{data:.3f} GB dataset, {free:.1f}/{total:.1f} GB free disk space')
    if data * sf < free:
        return True  # sufficient space
    LOGGER.warning(f'{PREFIX}WARNING: Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, '
                   f'training cancelled ❌. Please free {data * sf - free:.1f} GB additional disk space and try again.')
    return False  # insufficient space
 def request_with_credentials(url: str) -> any:
    """ Make a ajax request with cookies attached """
    from google.colab import output  # noqa
    from IPython import display  # noqa
    display.display(
        display.Javascript("""
            window._hub_tmp = new Promise((resolve, reject) => {
                const timeout = setTimeout(() => reject("Failed authenticating existing browser session"), 5000)
                fetch("%s", {
                    method: 'POST',
                    credentials: 'include'
                })
                    .then((response) => resolve(response.json()))
                    .then((json) => {
                    clearTimeout(timeout);
                    }).catch((err) => {
                    clearTimeout(timeout);
                    reject(err);
                });
            });
            """ % url))
    return output.eval_js("_hub_tmp")
 # Deprecated TODO: eliminate this function?
 def split_key(key: str = '') -> tuple[str, str]:
    """
    Verify and split a 'api_key[sep]model_id' string, sep is one of '.' or '_'
    Args:
        key (str): The model key to split. If not provided, the user will be prompted to enter it.
    Returns:
        Tuple[str, str]: A tuple containing the API key and model ID.
    """
    import getpass
    error_string = emojis(f'{PREFIX}Invalid API key ⚠️\n')  # error string
    if not key:
        key = getpass.getpass('Enter model key: ')
    sep = '_' if '_' in key else '.' if '.' in key else None  # separator
    assert sep, error_string
    api_key, model_id = key.split(sep)
    assert len(api_key) and len(model_id), error_string
    return api_key, model_id
 def smart_request(*args, retry=3, timeout=30, thread=True, code=-1, method="post", **kwargs):
    """
    Makes an HTTP request using the 'requests' library, with exponential backoff retries up to a specified timeout.
    Args:
        *args: Positional arguments to be passed to the requests function specified in method.
        retry (int, optional): Number of retries to attempt before giving up. Default is 3.
        timeout (int, optional): Timeout in seconds after which the function will give up retrying. Default is 30.
        thread (bool, optional): Whether to execute the request in a separate daemon thread. Default is True.
        code (int, optional): An identifier for the request, used for logging purposes. Default is -1.
        method (str, optional): The HTTP method to use for the request. Choices are 'post' and 'get'. Default is 'post'.
        **kwargs: Keyword arguments to be passed to the requests function specified in method.
    Returns:
        requests.Response: The HTTP response object. If the request is executed in a separate thread, returns None.
    """
    retry_codes = (408, 500)  # retry only these codes
    methods = {'post': requests.post, 'get': requests.get}  # request methods
    def fcn(*args, **kwargs):
        t0 = time.time()
        for i in range(retry + 1):
            if (time.time() - t0) > timeout:
                break
            r = methods[method](*args, **kwargs)  # i.e. post(url, data, json, files)
            if r.status_code == 200:
                break
            try:
                m = r.json().get('message', 'No JSON message.')
            except Exception:
                m = 'Unable to read JSON.'
            if i == 0:
                if r.status_code in retry_codes:
                    m += f' Retrying {retry}x for {timeout}s.' if retry else ''
                elif r.status_code == 429:  # rate limit
                    h = r.headers  # response headers
                    m = f"Rate limit reached ({h['X-RateLimit-Remaining']}/{h['X-RateLimit-Limit']}). " \
                        f"Please retry after {h['Retry-After']}s."
                LOGGER.warning(f"{PREFIX}{m} {HELP_MSG} ({r.status_code} #{code})")
                if r.status_code not in retry_codes:
                    return r
            time.sleep(2 ** i)  # exponential standoff
        return r
    if thread:
        threading.Thread(target=fcn, args=args, kwargs=kwargs, daemon=True).start()
    else:
        return fcn(*args, **kwargs)
 def sync_analytics(cfg, enabled=False):
    """
   Sync analytics data if enabled in the global settings
    Args:
        cfg (DictConfig): Configuration for the task and mode.
        enabled (bool): For debugging.
    """
    if SETTINGS['sync'] and RANK in {-1, 0} and enabled:
        cfg = dict(cfg)  # convert type from DictConfig to dict
        cfg['uuid'] = uuid.getnode()  # add the device UUID to the configuration data
        # Send a request to the HUB API to sync the analytics data
        smart_request(f'{HUB_API_ROOT}/analytics', data=cfg, headers=None, code=3, retry=0)
--- a/ultralytics/yolo/cli.py
+++ b/ultralytics/yolo/cli.py
@ -3,46 +3,48 @@ from pathlib import Path
 import hydra
-import ultralytics
+from ultralytics import hub, yolo
-from ultralytics import yolo
+from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, colorstr
-from .utils import DEFAULT_CONFIG, LOGGER, colorstr
+DIR = Path(__file__).parent
-@hydra.main(version_base=None, config_path="configs", config_name="default")
+@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent.relative_to(DIR)), config_name=DEFAULT_CONFIG.name)
 def cli(cfg):
-    cwd = Path().cwd()
+    """
-    LOGGER.info(f"{colorstr(f'Ultralytics YOLO v{ultralytics.__version__}')}")
+    Run a specified task and mode with the given configuration.
    Args:
        cfg (DictConfig): Configuration for the task and mode.
    """
    # LOGGER.info(f"{colorstr(f'Ultralytics YOLO v{ultralytics.__version__}')}")
    task, mode = cfg.task.lower(), cfg.mode.lower()
-    if task == "init":  # special case
+    # Special case for initializing the configuration
-        shutil.copy2(DEFAULT_CONFIG, cwd)
+    if task == "init":
        shutil.copy2(DEFAULT_CONFIG, Path.cwd())
        LOGGER.info(f"""
-        {colorstr("YOLO:")} configuration saved to {cwd / DEFAULT_CONFIG.name}.
+        {colorstr("YOLO:")} configuration saved to {Path.cwd() / DEFAULT_CONFIG.name}.
        To run experiments using custom configuration:
        yolo task='task' mode='mode' --config-name config_file.yaml
                    """)
        return
-    elif task == "detect":
+    # Mapping from task to module
-        module = yolo.v8.detect
+    task_module_map = {"detect": yolo.v8.detect, "segment": yolo.v8.segment, "classify": yolo.v8.classify}
-    elif task == "segment":
+    module = task_module_map.get(task)
-        module = yolo.v8.segment
+    if not module:
-    elif task == "classify":
+        raise SyntaxError(f"task not recognized. Choices are {', '.join(task_module_map.keys())}")
-        module = yolo.v8.classify
+
-    elif task == "export":
+    # Mapping from mode to function
-        func = yolo.engine.exporter.export
+    mode_func_map = {
-    else:
+        "train": module.train,
-        raise SyntaxError("task not recognized. Choices are `'detect', 'segment', 'classify'`")
+        "val": module.val,
        "predict": module.predict,
        "export": yolo.engine.exporter.export,
        "checks": hub.checks}
    func = mode_func_map.get(mode)
    if not func:
        raise SyntaxError(f"mode not recognized. Choices are {', '.join(mode_func_map.keys())}")
    if mode == "train":
        func = module.train
    elif mode == "val":
        func = module.val
    elif mode == "predict":
        func = module.predict
    elif mode == "export":
        func = yolo.engine.exporter.export
    else:
        raise SyntaxError("mode not recognized. Choices are `'train', 'val', 'predict', 'export'`")
    func(cfg)
--- a/ultralytics/yolo/configs/default.yaml
+++ b/ultralytics/yolo/configs/default.yaml
@ -8,6 +8,7 @@ mode: "train" # choices=['train', 'val', 'predict'] # mode to run task in.
 model: null # i.e. yolov5s.pt, yolo.yaml. Path to model file
 data: null # i.e. coco128.yaml. Path to data file
 epochs: 100 # number of epochs to train for
 patience: 50  # TODO: epochs to wait for no observable improvement for early stopping of training
 batch_size: 16 # number of images per batch
 imgsz: 640 # size of input images
 save: True # save checkpoints
--- a/ultralytics/yolo/engine/exporter.py
+++ b/ultralytics/yolo/engine/exporter.py
@ -71,8 +71,7 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, Segmentati
 from ultralytics.yolo.configs import get_config
 from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages
 from ultralytics.yolo.data.utils import check_dataset
-from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, colorstr, get_default_args, yaml_save
+from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, callbacks, colorstr, get_default_args, yaml_save
 from ultralytics.yolo.utils.callbacks import default_callbacks
 from ultralytics.yolo.utils.checks import check_imgsz, check_requirements, check_version, check_yaml
 from ultralytics.yolo.utils.files import file_size, increment_path
 from ultralytics.yolo.utils.ops import Profile
@ -138,16 +137,15 @@ class Exporter:
        """
        if overrides is None:
            overrides = {}
        if 'batch_size' not in overrides:
            overrides['batch_size'] = 1  # set default export batch size
        self.args = get_config(config, overrides)
        project = self.args.project or f"runs/{self.args.task}"
        name = self.args.name or "exp"  # hardcode mode as export doesn't require it
        self.save_dir = increment_path(Path(project) / name, exist_ok=self.args.exist_ok)
        self.save_dir.mkdir(parents=True, exist_ok=True)
-
+        self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()})  # add callbacks
-        # callbacks
+        callbacks.add_integration_callbacks(self)
        self.callbacks = defaultdict([])
        for callback, func in default_callbacks.items():
            self.add_callback(callback, func)
    @smart_inference_mode()
    def __call__(self, model=None):
@ -173,7 +171,6 @@ class Exporter:
            assert self.device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu'
        # Input
        self.args.batch_size = 1  # TODO: resolve this issue, default 16 not fit for export
        im = torch.zeros(self.args.batch_size, 3, *self.imgsz).to(self.device)
        file = Path(getattr(model, 'yaml_file', None) or Path(model.yaml['yaml_file']).name)
@ -765,18 +762,6 @@ class Exporter:
        LOGGER.info(f'{prefix} pipeline success')
        return model
    def add_callback(self, event: str, callback):
        """
        appends the given callback
        """
        self.callbacks[event].append(callback)
    def set_callback(self, event: str, callback):
        """
        overrides the existing callbacks with the given callback
        """
        self.callbacks[event] = [callback]
    def run_callbacks(self, event: str):
        for callback in self.callbacks.get(event, []):
            callback(self)
--- a/ultralytics/yolo/engine/predictor.py
+++ b/ultralytics/yolo/engine/predictor.py
@ -35,8 +35,7 @@ from ultralytics.nn.autobackend import AutoBackend
 from ultralytics.yolo.configs import get_config
 from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages, LoadScreenshots, LoadStreams
 from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS
-from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, colorstr, ops
+from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, callbacks, colorstr, ops
 from ultralytics.yolo.utils.callbacks import default_callbacks
 from ultralytics.yolo.utils.checks import check_file, check_imgsz, check_imshow
 from ultralytics.yolo.utils.files import increment_path
 from ultralytics.yolo.utils.torch_utils import select_device, smart_inference_mode
@ -90,11 +89,8 @@ class BasePredictor:
        self.view_img = None
        self.annotator = None
        self.data_path = None
-
+        self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()})  # add callbacks
-        # callbacks
+        callbacks.add_integration_callbacks(self)
        self.callbacks = defaultdict([])
        for callback, func in default_callbacks.items():
            self.add_callback(callback, func)
    def preprocess(self, img):
        pass
@ -227,18 +223,6 @@ class BasePredictor:
                self.vid_writer[idx] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
            self.vid_writer[idx].write(im0)
    def add_callback(self, event: str, callback):
        """
        appends the given callback
        """
        self.callbacks[event].append(callback)
    def set_callback(self, event: str, callback):
        """
        overrides the existing callbacks with the given callback
        """
        self.callbacks[event] = [callback]
    def run_callbacks(self, event: str):
        for callback in self.callbacks.get(event, []):
            callback(self)
--- a/ultralytics/yolo/engine/trainer.py
+++ b/ultralytics/yolo/engine/trainer.py
@ -21,11 +21,10 @@ from torch.optim import lr_scheduler
 from tqdm import tqdm
 import ultralytics.yolo.utils as utils
 import ultralytics.yolo.utils.callbacks as callbacks
 from ultralytics import __version__
 from ultralytics.yolo.configs import get_config
 from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml
-from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, TQDM_BAR_FORMAT, colorstr, yaml_save
+from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, TQDM_BAR_FORMAT, callbacks, colorstr, yaml_save
 from ultralytics.yolo.utils.checks import check_file, print_args
 from ultralytics.yolo.utils.dist import ddp_cleanup, generate_ddp_command
 from ultralytics.yolo.utils.files import get_latest_run, increment_path
@ -88,7 +87,7 @@ class BaseTrainer:
        self.model = None
        self.callbacks = defaultdict(list)
-        # dirs
+        # Dirs
        project = self.args.project or f"runs/{self.args.task}"
        name = self.args.name or f"{self.args.mode}"
        self.save_dir = increment_path(Path(project) / name, exist_ok=self.args.exist_ok if RANK in {-1, 0} else True)
@ -104,7 +103,7 @@ class BaseTrainer:
        if RANK == -1:
            print_args(dict(self.args))
-        # device
+        # Device
        self.device = utils.torch_utils.select_device(self.args.device, self.batch_size)
        self.amp = self.device.type != 'cpu'
        self.scaler = amp.GradScaler(enabled=self.amp)
@ -123,7 +122,7 @@ class BaseTrainer:
        self.lf = None
        self.scheduler = None
-        # epoch level metrics
+        # Epoch level metrics
        self.best_fitness = None
        self.fitness = None
        self.loss = None
@ -131,20 +130,20 @@ class BaseTrainer:
        self.loss_names = None
        self.csv = self.save_dir / 'results.csv'
-        for callback, func in callbacks.default_callbacks.items():
+        # Callbacks
-            self.add_callback(callback, func)
+        self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()})  # add callbacks
        if RANK in {0, -1}:
            callbacks.add_integration_callbacks(self)
    def add_callback(self, event: str, callback):
        """
-        appends the given callback
+        Appends the given callback. TODO: unused, consider removing
        """
        self.callbacks[event].append(callback)
    def set_callback(self, event: str, callback):
        """
-        overrides the existing callbacks with the given callback
+        Overrides the existing callbacks with the given callback.  TODO: unused, consider removing
        """
        self.callbacks[event] = [callback]
@ -469,7 +468,7 @@ class BaseTrainer:
                    self.validator.args.save_json = True
                    self.metrics = self.validator(model=f)
                    self.metrics.pop('fitness', None)
-                    self.run_callbacks('on_val_end')
+                    self.run_callbacks('on_fit_epoch_end')
    def check_resume(self):
        resume = self.args.resume
--- a/ultralytics/yolo/engine/validator.py
+++ b/ultralytics/yolo/engine/validator.py
@ -8,8 +8,7 @@ from tqdm import tqdm
 from ultralytics.nn.autobackend import AutoBackend
 from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml
-from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, TQDM_BAR_FORMAT
+from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, TQDM_BAR_FORMAT, callbacks
 from ultralytics.yolo.utils.callbacks import default_callbacks
 from ultralytics.yolo.utils.checks import check_imgsz
 from ultralytics.yolo.utils.files import increment_path
 from ultralytics.yolo.utils.ops import Profile
@ -66,10 +65,7 @@ class BaseValidator:
                                                   exist_ok=self.args.exist_ok if RANK in {-1, 0} else True)
        (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
-        # callbacks
+        self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()})  # add callbacks
        self.callbacks = defaultdict(list)
        for callback, func in default_callbacks.items():
            self.add_callback(callback, func)
    @smart_inference_mode()
    def __call__(self, trainer=None, model=None):
@ -77,7 +73,6 @@ class BaseValidator:
        Supports validation of a pre-trained model if passed or a model being trained
        if trainer is passed (trainer gets priority).
        """
        self.run_callbacks('on_val_start')
        self.training = trainer is not None
        if self.training:
            self.device = trainer.device
@ -89,6 +84,8 @@ class BaseValidator:
            self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device)
            self.args.plots = trainer.epoch == trainer.epochs - 1  # always plot final epoch
        else:
            callbacks.add_integration_callbacks(self)
            self.run_callbacks('on_val_start')
            assert model is not None, "Either trainer or model is needed for validation"
            self.device = select_device(self.args.device, self.args.batch_size)
            self.args.half &= self.device.type != 'cpu'
@ -167,18 +164,6 @@ class BaseValidator:
                stats = self.eval_json(stats)  # update stats
            return stats
    def add_callback(self, event: str, callback):
        """
        appends the given callback
        """
        self.callbacks[event].append(callback)
    def set_callback(self, event: str, callback):
        """
        overrides the existing callbacks with the given callback
        """
        self.callbacks[event] = [callback]
    def run_callbacks(self, event: str):
        for callback in self.callbacks.get(event, []):
            callback(self)
--- a/ultralytics/yolo/utils/init.py
+++ b/ultralytics/yolo/utils/init.py
@ -249,26 +249,6 @@ def threaded(func):
    return wrapper
 def get_settings(file=USER_CONFIG_DIR / 'settings.yaml'):
    """
    Function that loads a global settings YAML, or creates it and populates it with default values if it does not exist.
    If the datasets or weights directories are set to None, the current working directory will be used.
    The 'sync' setting determines whether analytics will be synced to help with YOLO development.
    """
    from ultralytics.yolo.utils.torch_utils import torch_distributed_zero_first
    with torch_distributed_zero_first(RANK):
        if not file.exists():
            settings = {
                'datasets_dir': None,  # default datasets directory. If None, current working directory is used.
                'weights_dir': None,  # default weights directory. If None, current working directory is used.
                'sync': True}  # sync analytics to help with YOLO development
            yaml_save(file, settings)
    return yaml_load(file)
 def yaml_save(file='data.yaml', data=None):
    """
    Save YAML data to a file.
@ -305,6 +285,26 @@ def yaml_load(file='data.yaml'):
        return {**yaml.safe_load(f), 'yaml_file': file}
 def get_settings(file=USER_CONFIG_DIR / 'settings.yaml'):
    """
    Function that loads a global settings YAML, or creates it and populates it with default values if it does not exist.
    If the datasets or weights directories are set to None, the current working directory will be used.
    The 'sync' setting determines whether analytics will be synced to help with YOLO development.
    """
    from ultralytics.yolo.utils.torch_utils import torch_distributed_zero_first
    with torch_distributed_zero_first(RANK):
        if not file.exists():
            settings = {
                'datasets_dir': None,  # default datasets directory. If None, current working directory is used.
                'weights_dir': None,  # default weights directory. If None, current working directory is used.
                'sync': True}  # sync analytics to help with YOLO development
            yaml_save(file, settings)
    return yaml_load(file)
 # Run below code on utils init -----------------------------------------------------------------------------------------
 # Set logger
--- a/ultralytics/yolo/utils/callbacks/base.py
+++ b/ultralytics/yolo/utils/callbacks/base.py
@ -135,11 +135,12 @@ default_callbacks = {
    'on_export_end': on_export_end}
-def add_integration_callbacks(trainer):
+def add_integration_callbacks(instance):
    from .clearml import callbacks as clearml_callbacks
-    from .tb import callbacks as tb_callbacks
+    from .hub import callbacks as hub_callbacks
    from .tensorboard import callbacks as tb_callbacks
    from .wb import callbacks as wb_callbacks
-    for x in clearml_callbacks, tb_callbacks, wb_callbacks:
+    for x in clearml_callbacks, hub_callbacks, tb_callbacks, wb_callbacks:
        for k, v in x.items():
-            trainer.add_callback(k, v)  # add_callback(name, func)
+            instance.callbacks[k].append(v)  # callback[name].append(func)
--- a/ultralytics/yolo/utils/callbacks/hub.py
+++ b/ultralytics/yolo/utils/callbacks/hub.py
@ -0,0 +1,80 @@
 import json
 from time import time
 import torch
 from ultralytics.hub.utils import PREFIX, sync_analytics
 from ultralytics.yolo.utils import LOGGER
 def on_pretrain_routine_end(trainer):
    session = getattr(trainer, 'hub_session', None)
    if session:
        # Start timer for upload rate limit
        LOGGER.info(f"{PREFIX}View model at https://hub.ultralytics.com/models/{session.model_id} 🚀")
        session.t = {'metrics': time(), 'ckpt': time()}  # start timer on self.rate_limit
 def on_fit_epoch_end(trainer):
    session = getattr(trainer, 'hub_session', None)
    if session:
        # Upload metrics after val end
        metrics = trainer.metrics
        for k, v in metrics.items():
            if isinstance(v, torch.Tensor):
                metrics[k] = v.item()
        session.metrics_queue[trainer.epoch] = json.dumps(metrics)  # json string
        if time() - session.t['metrics'] > session.rate_limits['metrics']:
            session.upload_metrics()
            session.t['metrics'] = time()  # reset timer
            session.metrics_queue = {}  # reset queue
 def on_model_save(trainer):
    session = getattr(trainer, 'hub_session', None)
    if session:
        # Upload checkpoints with rate limiting
        is_best = trainer.best_fitness == trainer.fitness
        if time() - session.t['ckpt'] > session.rate_limits['ckpt']:
            LOGGER.info(f"{PREFIX}Uploading checkpoint {session.model_id}")
            session.upload_model(trainer.epoch, trainer.last, is_best)
            session.t['ckpt'] = time()  # reset timer
 def on_train_end(trainer):
    session = getattr(trainer, 'hub_session', None)
    if session:
        # Upload final model and metrics with exponential standoff
        LOGGER.info(f"{PREFIX}Training completed successfully ✅\n"
                    f"{PREFIX}Uploading final {session.model_id}")
        session.upload_model(trainer.epoch, trainer.best, map=trainer.metrics['metrics/mAP50(B)'], final=True)
        session.alive = False  # stop heartbeats
        LOGGER.info(f"{PREFIX}View model at https://hub.ultralytics.com/models/{session.model_id} 🚀")
 def on_train_start(trainer):
    sync_analytics(trainer.args)
 def on_val_start(validator):
    sync_analytics(validator.args)
 def on_predict_start(predictor):
    sync_analytics(predictor.args)
 def on_export_start(exporter):
    sync_analytics(exporter.args)
 callbacks = {
    "on_pretrain_routine_end": on_pretrain_routine_end,
    "on_fit_epoch_end": on_fit_epoch_end,
    "on_model_save": on_model_save,
    "on_train_end": on_train_end,
    "on_train_start": on_train_start,
    "on_val_start": on_val_start,
    "on_predict_start": on_predict_start,
    "on_export_start": on_export_start}
--- a/ultralytics/yolo/utils/callbacks/tensorboard.py
+++ b/ultralytics/yolo/utils/callbacks/tensorboard.py
`@ -2,4 +2,4 @@ __version__ = "8.0.0.dev0"`

	`from ultralytics.yolo.engine.model import YOLO`	`from ultralytics.yolo.engine.model import YOLO`

	`__all__ = ["__version__", "YOLO"] # allow simpler import`	`__all__ = ["__version__", "YOLO", "hub"] # allow simpler import`