ultralytics 8.0.31 updates and fixes (#857)

Co-authored-by: Yonghye Kwon <developer.0hye@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Kalen Michael <kalenmike@gmail.com>
This commit is contained in:
Glenn Jocher
2023-02-08 03:27:59 +04:00
committed by GitHub
parent 2e7a533ac3
commit f5d003d05a
9 changed files with 285 additions and 131 deletions

View File

@ -4,67 +4,62 @@ import requests
from ultralytics.hub.auth import Auth
from ultralytics.hub.session import HubTrainingSession
from ultralytics.hub.utils import PREFIX, split_key
from ultralytics.yolo.utils import LOGGER, emojis
from ultralytics.yolo.v8.detect import DetectionTrainer
from ultralytics.hub.utils import split_key
from ultralytics.yolo.engine.exporter import export_formats
from ultralytics.yolo.engine.model import YOLO
from ultralytics.yolo.utils import LOGGER, emojis, PREFIX
# Define all export formats
EXPORT_FORMATS = list(export_formats()['Argument'][1:]) + ["ultralytics_tflite", "ultralytics_coreml"]
def start(key=''):
# Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY')
def request_api_key(attempts=0):
"""Prompt the user to input their API key"""
import getpass
max_attempts = 3
tries = f"Attempt {str(attempts + 1)} of {max_attempts}" if attempts > 0 else ""
LOGGER.info(f"{PREFIX}Login. {tries}")
input_key = getpass.getpass("Enter your Ultralytics HUB API key:\n")
auth.api_key, model_id = split_key(input_key)
if not auth.authenticate():
attempts += 1
LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n")
if attempts < max_attempts:
return request_api_key(attempts)
raise ConnectionError(emojis(f"{PREFIX}Failed to authenticate ❌"))
else:
return model_id
def start(key=""):
"""
Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY')
"""
auth = Auth(key)
try:
api_key, model_id = split_key(key)
auth = Auth(api_key) # attempts cookie login if no api key is present
attempts = 1 if len(key) else 0
if not auth.get_state():
if len(key):
LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n")
model_id = request_api_key(attempts)
LOGGER.info(f"{PREFIX}Authenticated ✅")
model_id = request_api_key(auth)
else:
_, model_id = split_key(key)
if not model_id:
raise ConnectionError(emojis('Connecting with global API key is not currently supported. ❌'))
session = HubTrainingSession(model_id=model_id, auth=auth)
session.check_disk_space()
# TODO: refactor, hardcoded for v8
args = session.model.copy()
args.pop("id")
args.pop("status")
args.pop("weights")
args["data"] = "coco128.yaml"
args["model"] = "yolov8n.yaml"
args["batch_size"] = 16
args["imgsz"] = 64
trainer = DetectionTrainer(overrides=args)
trainer = YOLO(session.input_file)
session.register_callbacks(trainer)
setattr(trainer, 'hub_session', session)
trainer.train()
trainer.train(**session.train_args)
except Exception as e:
LOGGER.warning(f"{PREFIX}{e}")
def reset_model(key=''):
def request_api_key(auth, max_attempts=3):
"""
Prompt the user to input their API key. Returns the model ID.
"""
import getpass
for attempts in range(max_attempts):
LOGGER.info(f"{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}")
input_key = getpass.getpass("Enter your Ultralytics HUB API key:\n")
auth.api_key, model_id = split_key(input_key)
if auth.authenticate():
LOGGER.info(f"{PREFIX}Authenticated ✅")
return model_id
LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n")
raise ConnectionError(emojis(f"{PREFIX}Failed to authenticate ❌"))
def reset_model(key=""):
# Reset a trained model to an untrained state
api_key, model_id = split_key(key)
r = requests.post('https://api.ultralytics.com/model-reset', json={"apiKey": api_key, "modelId": model_id})
r = requests.post("https://api.ultralytics.com/model-reset", json={"apiKey": api_key, "modelId": model_id})
if r.status_code == 200:
LOGGER.info(f"{PREFIX}model reset successfully")
@ -72,38 +67,32 @@ def reset_model(key=''):
LOGGER.warning(f"{PREFIX}model reset failure {r.status_code} {r.reason}")
def export_model(key='', format='torchscript'):
def export_model(key="", format="torchscript"):
# Export a model to all formats
assert format in EXPORT_FORMATS, f"Unsupported export format '{format}' passed, valid formats are {EXPORT_FORMATS}"
api_key, model_id = split_key(key)
formats = ('torchscript', 'onnx', 'openvino', 'engine', 'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs',
'ultralytics_tflite', 'ultralytics_coreml')
assert format in formats, f"ERROR: Unsupported export format '{format}' passed, valid formats are {formats}"
r = requests.post('https://api.ultralytics.com/export',
r = requests.post("https://api.ultralytics.com/export",
json={
"apiKey": api_key,
"modelId": model_id,
"format": format})
assert r.status_code == 200, f"{PREFIX}{format} export failure {r.status_code} {r.reason}"
assert (r.status_code == 200), f"{PREFIX}{format} export failure {r.status_code} {r.reason}"
LOGGER.info(f"{PREFIX}{format} export started ✅")
def get_export(key='', format='torchscript'):
def get_export(key="", format="torchscript"):
# Get an exported model dictionary with download URL
assert format in EXPORT_FORMATS, f"Unsupported export format '{format}' passed, valid formats are {EXPORT_FORMATS}"
api_key, model_id = split_key(key)
formats = ('torchscript', 'onnx', 'openvino', 'engine', 'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs',
'ultralytics_tflite', 'ultralytics_coreml')
assert format in formats, f"ERROR: Unsupported export format '{format}' passed, valid formats are {formats}"
r = requests.post('https://api.ultralytics.com/get-export',
r = requests.post("https://api.ultralytics.com/get-export",
json={
"apiKey": api_key,
"modelId": model_id,
"format": format})
assert r.status_code == 200, f"{PREFIX}{format} get_export failure {r.status_code} {r.reason}"
assert (r.status_code == 200), f"{PREFIX}{format} get_export failure {r.status_code} {r.reason}"
return r.json()
# temp. For checking
if __name__ == "__main__":
start(key="b3fba421be84a20dbe68644e14436d1cce1b0a0aaa_HeMfHgvHsseMPhdq7Ylz")
start()

View File

@ -1,16 +1,18 @@
# Ultralytics YOLO 🚀, GPL-3.0 license
import json
import signal
import sys
from pathlib import Path
from time import sleep
from time import sleep, time
import requests
from ultralytics import __version__
from ultralytics.hub.utils import HUB_API_ROOT, check_dataset_disk_space, smart_request
from ultralytics.yolo.utils import is_colab, threaded
AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local'
from ultralytics.yolo.utils import is_colab, threaded, LOGGER, emojis, PREFIX
from ultralytics.yolo.utils.torch_utils import get_flops, get_num_params
AGENT_NAME = (f"python-{__version__}-colab" if is_colab() else f"python-{__version__}-local")
session = None
@ -19,23 +21,37 @@ class HubTrainingSession:
def __init__(self, model_id, auth):
self.agent_id = None # identifies which instance is communicating with server
self.model_id = model_id
self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}'
self.api_url = f"{HUB_API_ROOT}/v1/models/{model_id}"
self.auth_header = auth.get_auth_header()
self.rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0} # rate limits (seconds)
self.t = {} # rate limit timers (seconds)
self.metrics_queue = {} # metrics queue
self.alive = True # for heartbeats
self._rate_limits = {"metrics": 3.0, "ckpt": 900.0, "heartbeat": 300.0} # rate limits (seconds)
self._timers = {} # rate limit timers (seconds)
self._metrics_queue = {} # metrics queue
self.model = self._get_model()
self._heartbeats() # start heartbeats
signal.signal(signal.SIGTERM, self.shutdown) # register the shutdown function to be called on exit
signal.signal(signal.SIGINT, self.shutdown)
self._start_heartbeat() # start heartbeats
self._register_signal_handlers()
def shutdown(self, *args): # noqa
self.alive = False # stop heartbeats
def _register_signal_handlers(self):
signal.signal(signal.SIGTERM, self._handle_signal)
signal.signal(signal.SIGINT, self._handle_signal)
def _handle_signal(self, signum, frame):
"""
Prevent heartbeats from being sent on Colab after kill.
This method does not use frame, it is included as it is
passed by signal.
"""
if self.alive is True:
LOGGER.info(f"{PREFIX}Kill signal received! ❌")
self._stop_heartbeat()
sys.exit(signum)
def _stop_heartbeat(self):
"""End the heartbeat loop"""
self.alive = False
def upload_metrics(self):
payload = {"metrics": self.metrics_queue.copy(), "type": "metrics"}
smart_request(f'{self.api_url}', json=payload, headers=self.auth_header, code=2)
payload = {"metrics": self._metrics_queue.copy(), "type": "metrics"}
smart_request(f"{self.api_url}", json=payload, headers=self.auth_header, code=2)
def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
# Upload a model to HUB
@ -44,25 +60,29 @@ class HubTrainingSession:
with open(weights, "rb") as f:
file = f.read()
if final:
smart_request(f'{self.api_url}/upload',
data={
"epoch": epoch,
"type": "final",
"map": map},
files={"best.pt": file},
headers=self.auth_header,
retry=10,
timeout=3600,
code=4)
smart_request(
f"{self.api_url}/upload",
data={
"epoch": epoch,
"type": "final",
"map": map},
files={"best.pt": file},
headers=self.auth_header,
retry=10,
timeout=3600,
code=4,
)
else:
smart_request(f'{self.api_url}/upload',
data={
"epoch": epoch,
"type": "epoch",
"isBest": bool(is_best)},
headers=self.auth_header,
files={"last.pt": file},
code=3)
smart_request(
f"{self.api_url}/upload",
data={
"epoch": epoch,
"type": "epoch",
"isBest": bool(is_best)},
headers=self.auth_header,
files={"last.pt": file},
code=3,
)
def _get_model(self):
# Returns model from database by id
@ -70,31 +90,131 @@ class HubTrainingSession:
headers = self.auth_header
try:
r = smart_request(api_url, method="get", headers=headers, thread=False, code=0)
data = r.json().get("data", None)
if not data:
return
assert data['data'], 'ERROR: Dataset may still be processing. Please wait a minute and try again.' # RF fix
response = smart_request(api_url, method="get", headers=headers, thread=False, code=0)
data = response.json().get("data", None)
if data.get("status", None) == "trained":
raise ValueError(
emojis(f"Model trained. View model at https://hub.ultralytics.com/models/{self.model_id} 🚀"))
if not data.get("data", None):
raise ValueError("Dataset may still be processing. Please wait a minute and try again.") # RF fix
self.model_id = data["id"]
# TODO: restore when server keys when dataset URL and GPU train is working
self.train_args = {
"batch": data["batch_size"],
"epochs": data["epochs"],
"imgsz": data["imgsz"],
"patience": data["patience"],
"device": data["device"],
"cache": data["cache"],
"data": data["data"]}
self.input_file = data.get("cfg", data["weights"])
# hack for yolov5 cfg adds u
if "cfg" in data and "yolov5" in data["cfg"]:
self.input_file = data["cfg"].replace(".yaml", "u.yaml")
return data
except requests.exceptions.ConnectionError as e:
raise ConnectionRefusedError('ERROR: The HUB server is not online. Please try again later.') from e
raise ConnectionRefusedError("ERROR: The HUB server is not online. Please try again later.") from e
except Exception:
raise
def check_disk_space(self):
if not check_dataset_disk_space(self.model['data']):
if not check_dataset_disk_space(self.model["data"]):
raise MemoryError("Not enough disk space")
def register_callbacks(self, trainer):
trainer.add_callback("on_pretrain_routine_end", self.on_pretrain_routine_end)
trainer.add_callback("on_fit_epoch_end", self.on_fit_epoch_end)
trainer.add_callback("on_model_save", self.on_model_save)
trainer.add_callback("on_train_end", self.on_train_end)
def on_pretrain_routine_end(self, trainer):
"""
Start timer for upload rate limit.
This method does not use trainer. It is passed to all callbacks by default.
"""
# Start timer for upload rate limit
LOGGER.info(f"{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀")
self._timers = {"metrics": time(), "ckpt": time()} # start timer on self.rate_limit
def on_fit_epoch_end(self, trainer):
# Upload metrics after val end
all_plots = {**trainer.label_loss_items(trainer.tloss, prefix="train"), **trainer.metrics}
if trainer.epoch == 0:
model_info = {
"model/parameters": get_num_params(trainer.model),
"model/GFLOPs": round(get_flops(trainer.model), 3),
"model/speed(ms)": round(trainer.validator.speed[1], 3)}
all_plots = {**all_plots, **model_info}
self._metrics_queue[trainer.epoch] = json.dumps(all_plots)
if time() - self._timers["metrics"] > self._rate_limits["metrics"]:
self.upload_metrics()
self._timers["metrics"] = time() # reset timer
self._metrics_queue = {} # reset queue
def on_model_save(self, trainer):
# Upload checkpoints with rate limiting
is_best = trainer.best_fitness == trainer.fitness
if time() - self._timers["ckpt"] > self._rate_limits["ckpt"]:
LOGGER.info(f"{PREFIX}Uploading checkpoint {self.model_id}")
self._upload_model(trainer.epoch, trainer.last, is_best)
self._timers["ckpt"] = time() # reset timer
def on_train_end(self, trainer):
# Upload final model and metrics with exponential standoff
LOGGER.info(f"{PREFIX}Training completed successfully ✅")
LOGGER.info(f"{PREFIX}Uploading final {self.model_id}")
# hack for fetching mAP
mAP = trainer.metrics.get("metrics/mAP50-95(B)", 0)
self._upload_model(trainer.epoch, trainer.best, map=mAP, final=True) # results[3] is mAP0.5:0.95
self.alive = False # stop heartbeats
LOGGER.info(f"{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀")
def _upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
# Upload a model to HUB
file = None
if Path(weights).is_file():
with open(weights, "rb") as f:
file = f.read()
file_param = {"best.pt" if final else "last.pt": file}
endpoint = f"{self.api_url}/upload"
data = {"epoch": epoch}
if final:
data.update({"type": "final", "map": map})
else:
data.update({"type": "epoch", "isBest": bool(is_best)})
smart_request(
endpoint,
data=data,
files=file_param,
headers=self.auth_header,
retry=10 if final else None,
timeout=3600 if final else None,
code=4 if final else 3,
)
@threaded
def _heartbeats(self):
def _start_heartbeat(self):
self.alive = True
while self.alive:
r = smart_request(f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}',
json={
"agent": AGENT_NAME,
"agentId": self.agent_id},
headers=self.auth_header,
retry=0,
code=5,
thread=False)
self.agent_id = r.json().get('data', {}).get('agentId', None)
sleep(self.rate_limits['heartbeat'])
r = smart_request(
f"{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}",
json={
"agent": AGENT_NAME,
"agentId": self.agent_id},
headers=self.auth_header,
retry=0,
code=5,
thread=False,
)
self.agent_id = r.json().get("data", {}).get("agentId", None)
sleep(self._rate_limits["heartbeat"])