ultralytics 8.0.31 updates and fixes (#857)

Co-authored-by: Yonghye Kwon <developer.0hye@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Kalen Michael <kalenmike@gmail.com>
2023-02-08 03:27:59 +04:00
parent 2e7a533ac3
commit f5d003d05a
9 changed files with 285 additions and 131 deletions
--- a/ultralytics/hub/init.py
+++ b/ultralytics/hub/init.py
@ -4,67 +4,62 @@ import requests

 from ultralytics.hub.auth import Auth
 from ultralytics.hub.session import HubTrainingSession
-from ultralytics.hub.utils import PREFIX, split_key
-from ultralytics.yolo.utils import LOGGER, emojis
-from ultralytics.yolo.v8.detect import DetectionTrainer
+from ultralytics.hub.utils import split_key
+from ultralytics.yolo.engine.exporter import export_formats
+from ultralytics.yolo.engine.model import YOLO
+from ultralytics.yolo.utils import LOGGER, emojis, PREFIX
+
+# Define all export formats
+EXPORT_FORMATS = list(export_formats()['Argument'][1:]) + ["ultralytics_tflite", "ultralytics_coreml"]


-def start(key=''):
-    # Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY')
-    def request_api_key(attempts=0):
-        """Prompt the user to input their API key"""
-        import getpass
-
-        max_attempts = 3
-        tries = f"Attempt {str(attempts + 1)} of {max_attempts}" if attempts > 0 else ""
-        LOGGER.info(f"{PREFIX}Login. {tries}")
-        input_key = getpass.getpass("Enter your Ultralytics HUB API key:\n")
-        auth.api_key, model_id = split_key(input_key)
-        if not auth.authenticate():
-            attempts += 1
-            LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n")
-            if attempts < max_attempts:
-                return request_api_key(attempts)
-            raise ConnectionError(emojis(f"{PREFIX}Failed to authenticate ❌"))
-        else:
-            return model_id
-
+def start(key=""):
+    """
+    Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY')
+    """
+    auth = Auth(key)
    try:
-        api_key, model_id = split_key(key)
-        auth = Auth(api_key)  # attempts cookie login if no api key is present
-        attempts = 1 if len(key) else 0
        if not auth.get_state():
-            if len(key):
-                LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n")
-            model_id = request_api_key(attempts)
-        LOGGER.info(f"{PREFIX}Authenticated ✅")
+            model_id = request_api_key(auth)
+        else:
+            _, model_id = split_key(key)
+
        if not model_id:
            raise ConnectionError(emojis('Connecting with global API key is not currently supported. ❌'))
+
        session = HubTrainingSession(model_id=model_id, auth=auth)
        session.check_disk_space()

-        # TODO: refactor, hardcoded for v8
-        args = session.model.copy()
-        args.pop("id")
-        args.pop("status")
-        args.pop("weights")
-        args["data"] = "coco128.yaml"
-        args["model"] = "yolov8n.yaml"
-        args["batch_size"] = 16
-        args["imgsz"] = 64
-
-        trainer = DetectionTrainer(overrides=args)
+        trainer = YOLO(session.input_file)
        session.register_callbacks(trainer)
-        setattr(trainer, 'hub_session', session)
-        trainer.train()
+        trainer.train(**session.train_args)
    except Exception as e:
        LOGGER.warning(f"{PREFIX}{e}")


-def reset_model(key=''):
+def request_api_key(auth, max_attempts=3):
+    """
+    Prompt the user to input their API key. Returns the model ID.
+    """
+    import getpass
+    for attempts in range(max_attempts):
+        LOGGER.info(f"{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}")
+        input_key = getpass.getpass("Enter your Ultralytics HUB API key:\n")
+        auth.api_key, model_id = split_key(input_key)
+
+        if auth.authenticate():
+            LOGGER.info(f"{PREFIX}Authenticated ✅")
+            return model_id
+
+        LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n")
+
+    raise ConnectionError(emojis(f"{PREFIX}Failed to authenticate ❌"))
+
+
+def reset_model(key=""):
    # Reset a trained model to an untrained state
    api_key, model_id = split_key(key)
-    r = requests.post('https://api.ultralytics.com/model-reset', json={"apiKey": api_key, "modelId": model_id})
+    r = requests.post("https://api.ultralytics.com/model-reset", json={"apiKey": api_key, "modelId": model_id})

    if r.status_code == 200:
        LOGGER.info(f"{PREFIX}model reset successfully")
@ -72,38 +67,32 @@ def reset_model(key=''):
    LOGGER.warning(f"{PREFIX}model reset failure {r.status_code} {r.reason}")


-def export_model(key='', format='torchscript'):
+def export_model(key="", format="torchscript"):
    # Export a model to all formats
+    assert format in EXPORT_FORMATS, f"Unsupported export format '{format}' passed, valid formats are {EXPORT_FORMATS}"
    api_key, model_id = split_key(key)
-    formats = ('torchscript', 'onnx', 'openvino', 'engine', 'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs',
-               'ultralytics_tflite', 'ultralytics_coreml')
-    assert format in formats, f"ERROR: Unsupported export format '{format}' passed, valid formats are {formats}"
-
-    r = requests.post('https://api.ultralytics.com/export',
+    r = requests.post("https://api.ultralytics.com/export",
                      json={
                          "apiKey": api_key,
                          "modelId": model_id,
                          "format": format})
-    assert r.status_code == 200, f"{PREFIX}{format} export failure {r.status_code} {r.reason}"
+    assert (r.status_code == 200), f"{PREFIX}{format} export failure {r.status_code} {r.reason}"
    LOGGER.info(f"{PREFIX}{format} export started ✅")


-def get_export(key='', format='torchscript'):
+def get_export(key="", format="torchscript"):
    # Get an exported model dictionary with download URL
+    assert format in EXPORT_FORMATS, f"Unsupported export format '{format}' passed, valid formats are {EXPORT_FORMATS}"
    api_key, model_id = split_key(key)
-    formats = ('torchscript', 'onnx', 'openvino', 'engine', 'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs',
-               'ultralytics_tflite', 'ultralytics_coreml')
-    assert format in formats, f"ERROR: Unsupported export format '{format}' passed, valid formats are {formats}"
-
-    r = requests.post('https://api.ultralytics.com/get-export',
+    r = requests.post("https://api.ultralytics.com/get-export",
                      json={
                          "apiKey": api_key,
                          "modelId": model_id,
                          "format": format})
-    assert r.status_code == 200, f"{PREFIX}{format} get_export failure {r.status_code} {r.reason}"
+    assert (r.status_code == 200), f"{PREFIX}{format} get_export failure {r.status_code} {r.reason}"
    return r.json()


 # temp. For checking
 if __name__ == "__main__":
-    start(key="b3fba421be84a20dbe68644e14436d1cce1b0a0aaa_HeMfHgvHsseMPhdq7Ylz")
+    start()
--- a/ultralytics/hub/session.py
+++ b/ultralytics/hub/session.py
@ -1,16 +1,18 @@
 # Ultralytics YOLO 🚀, GPL-3.0 license
+import json
 import signal
+import sys
 from pathlib import Path
-from time import sleep
+from time import sleep, time

 import requests

 from ultralytics import __version__
 from ultralytics.hub.utils import HUB_API_ROOT, check_dataset_disk_space, smart_request
-from ultralytics.yolo.utils import is_colab, threaded
-
-AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local'
+from ultralytics.yolo.utils import is_colab, threaded, LOGGER, emojis, PREFIX
+from ultralytics.yolo.utils.torch_utils import get_flops, get_num_params

+AGENT_NAME = (f"python-{__version__}-colab" if is_colab() else f"python-{__version__}-local")
 session = None


@ -19,23 +21,37 @@ class HubTrainingSession:
    def __init__(self, model_id, auth):
        self.agent_id = None  # identifies which instance is communicating with server
        self.model_id = model_id
-        self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}'
+        self.api_url = f"{HUB_API_ROOT}/v1/models/{model_id}"
        self.auth_header = auth.get_auth_header()
-        self.rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0}  # rate limits (seconds)
-        self.t = {}  # rate limit timers (seconds)
-        self.metrics_queue = {}  # metrics queue
-        self.alive = True  # for heartbeats
+        self._rate_limits = {"metrics": 3.0, "ckpt": 900.0, "heartbeat": 300.0}  # rate limits (seconds)
+        self._timers = {}  # rate limit timers (seconds)
+        self._metrics_queue = {}  # metrics queue
        self.model = self._get_model()
-        self._heartbeats()  # start heartbeats
-        signal.signal(signal.SIGTERM, self.shutdown)  # register the shutdown function to be called on exit
-        signal.signal(signal.SIGINT, self.shutdown)
+        self._start_heartbeat()  # start heartbeats
+        self._register_signal_handlers()

-    def shutdown(self, *args):  # noqa
-        self.alive = False  # stop heartbeats
+    def _register_signal_handlers(self):
+        signal.signal(signal.SIGTERM, self._handle_signal)
+        signal.signal(signal.SIGINT, self._handle_signal)
+
+    def _handle_signal(self, signum, frame):
+        """
+        Prevent heartbeats from being sent on Colab after kill.
+        This method does not use frame, it is included as it is
+        passed by signal.
+        """
+        if self.alive is True:
+            LOGGER.info(f"{PREFIX}Kill signal received! ❌")
+            self._stop_heartbeat()
+            sys.exit(signum)
+
+    def _stop_heartbeat(self):
+        """End the heartbeat loop"""
+        self.alive = False

    def upload_metrics(self):
-        payload = {"metrics": self.metrics_queue.copy(), "type": "metrics"}
-        smart_request(f'{self.api_url}', json=payload, headers=self.auth_header, code=2)
+        payload = {"metrics": self._metrics_queue.copy(), "type": "metrics"}
+        smart_request(f"{self.api_url}", json=payload, headers=self.auth_header, code=2)

    def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
        # Upload a model to HUB
@ -44,25 +60,29 @@ class HubTrainingSession:
            with open(weights, "rb") as f:
                file = f.read()
        if final:
-            smart_request(f'{self.api_url}/upload',
-                          data={
-                              "epoch": epoch,
-                              "type": "final",
-                              "map": map},
-                          files={"best.pt": file},
-                          headers=self.auth_header,
-                          retry=10,
-                          timeout=3600,
-                          code=4)
+            smart_request(
+                f"{self.api_url}/upload",
+                data={
+                    "epoch": epoch,
+                    "type": "final",
+                    "map": map},
+                files={"best.pt": file},
+                headers=self.auth_header,
+                retry=10,
+                timeout=3600,
+                code=4,
+            )
        else:
-            smart_request(f'{self.api_url}/upload',
-                          data={
-                              "epoch": epoch,
-                              "type": "epoch",
-                              "isBest": bool(is_best)},
-                          headers=self.auth_header,
-                          files={"last.pt": file},
-                          code=3)
+            smart_request(
+                f"{self.api_url}/upload",
+                data={
+                    "epoch": epoch,
+                    "type": "epoch",
+                    "isBest": bool(is_best)},
+                headers=self.auth_header,
+                files={"last.pt": file},
+                code=3,
+            )

    def _get_model(self):
        # Returns model from database by id
@ -70,31 +90,131 @@ class HubTrainingSession:
        headers = self.auth_header

        try:
-            r = smart_request(api_url, method="get", headers=headers, thread=False, code=0)
-            data = r.json().get("data", None)
-            if not data:
-                return
-            assert data['data'], 'ERROR: Dataset may still be processing. Please wait a minute and try again.'  # RF fix
+            response = smart_request(api_url, method="get", headers=headers, thread=False, code=0)
+            data = response.json().get("data", None)
+
+            if data.get("status", None) == "trained":
+                raise ValueError(
+                    emojis(f"Model trained. View model at https://hub.ultralytics.com/models/{self.model_id} 🚀"))
+
+            if not data.get("data", None):
+                raise ValueError("Dataset may still be processing. Please wait a minute and try again.")  # RF fix
            self.model_id = data["id"]

+            # TODO: restore when server keys when dataset URL and GPU train is working
+
+            self.train_args = {
+                "batch": data["batch_size"],
+                "epochs": data["epochs"],
+                "imgsz": data["imgsz"],
+                "patience": data["patience"],
+                "device": data["device"],
+                "cache": data["cache"],
+                "data": data["data"]}
+
+            self.input_file = data.get("cfg", data["weights"])
+
+            # hack for yolov5 cfg adds u
+            if "cfg" in data and "yolov5" in data["cfg"]:
+                self.input_file = data["cfg"].replace(".yaml", "u.yaml")
+
            return data
        except requests.exceptions.ConnectionError as e:
-            raise ConnectionRefusedError('ERROR: The HUB server is not online. Please try again later.') from e
+            raise ConnectionRefusedError("ERROR: The HUB server is not online. Please try again later.") from e
+        except Exception:
+            raise

    def check_disk_space(self):
-        if not check_dataset_disk_space(self.model['data']):
+        if not check_dataset_disk_space(self.model["data"]):
            raise MemoryError("Not enough disk space")

+    def register_callbacks(self, trainer):
+        trainer.add_callback("on_pretrain_routine_end", self.on_pretrain_routine_end)
+        trainer.add_callback("on_fit_epoch_end", self.on_fit_epoch_end)
+        trainer.add_callback("on_model_save", self.on_model_save)
+        trainer.add_callback("on_train_end", self.on_train_end)
+
+    def on_pretrain_routine_end(self, trainer):
+        """
+        Start timer for upload rate limit.
+        This method does not use trainer. It is passed to all callbacks by default.
+        """
+        # Start timer for upload rate limit
+        LOGGER.info(f"{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀")
+        self._timers = {"metrics": time(), "ckpt": time()}  # start timer on self.rate_limit
+
+    def on_fit_epoch_end(self, trainer):
+        # Upload metrics after val end
+        all_plots = {**trainer.label_loss_items(trainer.tloss, prefix="train"), **trainer.metrics}
+
+        if trainer.epoch == 0:
+            model_info = {
+                "model/parameters": get_num_params(trainer.model),
+                "model/GFLOPs": round(get_flops(trainer.model), 3),
+                "model/speed(ms)": round(trainer.validator.speed[1], 3)}
+            all_plots = {**all_plots, **model_info}
+        self._metrics_queue[trainer.epoch] = json.dumps(all_plots)
+        if time() - self._timers["metrics"] > self._rate_limits["metrics"]:
+            self.upload_metrics()
+            self._timers["metrics"] = time()  # reset timer
+            self._metrics_queue = {}  # reset queue
+
+    def on_model_save(self, trainer):
+        # Upload checkpoints with rate limiting
+        is_best = trainer.best_fitness == trainer.fitness
+        if time() - self._timers["ckpt"] > self._rate_limits["ckpt"]:
+            LOGGER.info(f"{PREFIX}Uploading checkpoint {self.model_id}")
+            self._upload_model(trainer.epoch, trainer.last, is_best)
+            self._timers["ckpt"] = time()  # reset timer
+
+    def on_train_end(self, trainer):
+        # Upload final model and metrics with exponential standoff
+        LOGGER.info(f"{PREFIX}Training completed successfully ✅")
+        LOGGER.info(f"{PREFIX}Uploading final {self.model_id}")
+
+        # hack for fetching mAP
+        mAP = trainer.metrics.get("metrics/mAP50-95(B)", 0)
+        self._upload_model(trainer.epoch, trainer.best, map=mAP, final=True)  # results[3] is mAP0.5:0.95
+        self.alive = False  # stop heartbeats
+        LOGGER.info(f"{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀")
+
+    def _upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
+        # Upload a model to HUB
+        file = None
+        if Path(weights).is_file():
+            with open(weights, "rb") as f:
+                file = f.read()
+        file_param = {"best.pt" if final else "last.pt": file}
+        endpoint = f"{self.api_url}/upload"
+        data = {"epoch": epoch}
+        if final:
+            data.update({"type": "final", "map": map})
+        else:
+            data.update({"type": "epoch", "isBest": bool(is_best)})
+
+        smart_request(
+            endpoint,
+            data=data,
+            files=file_param,
+            headers=self.auth_header,
+            retry=10 if final else None,
+            timeout=3600 if final else None,
+            code=4 if final else 3,
+        )
+
    @threaded
-    def _heartbeats(self):
+    def _start_heartbeat(self):
+        self.alive = True
        while self.alive:
-            r = smart_request(f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}',
-                              json={
-                                  "agent": AGENT_NAME,
-                                  "agentId": self.agent_id},
-                              headers=self.auth_header,
-                              retry=0,
-                              code=5,
-                              thread=False)
-            self.agent_id = r.json().get('data', {}).get('agentId', None)
-            sleep(self.rate_limits['heartbeat'])
+            r = smart_request(
+                f"{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}",
+                json={
+                    "agent": AGENT_NAME,
+                    "agentId": self.agent_id},
+                headers=self.auth_header,
+                retry=0,
+                code=5,
+                thread=False,
+            )
+            self.agent_id = r.json().get("data", {}).get("agentId", None)
+            sleep(self._rate_limits["heartbeat"])