8.0.60 new HUB training syntax (#1753)

Co-authored-by: Rafael Pierre <97888102+rafaelvp-db@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Co-authored-by: Semih Demirel <85176438+semihhdemirel@users.noreply.github.com>
This commit is contained in:
Glenn Jocher
2023-04-03 02:36:58 +02:00
committed by GitHub
parent e7876e1ba9
commit 84948651cd
25 changed files with 405 additions and 122 deletions

View File

@ -2,47 +2,51 @@
import requests
from ultralytics.hub.auth import Auth
from ultralytics.hub.session import HUBTrainingSession
from ultralytics.hub.utils import PREFIX, split_key
from ultralytics.yolo.engine.model import YOLO
from ultralytics.yolo.utils import LOGGER, emojis
from ultralytics.yolo.utils import LOGGER
def login(api_key=''):
"""
Log in to the Ultralytics HUB API using the provided API key.
Args:
api_key (str, optional): May be an API key or a combination API key and model ID, i.e. key_id
Example:
from ultralytics import hub
hub.login('your_api_key')
"""
from ultralytics.hub.auth import Auth
Auth(api_key)
def logout():
"""
Logout Ultralytics HUB
Example:
from ultralytics import hub
hub.logout()
"""
LOGGER.warning('WARNING ⚠️ This method is not yet implemented.')
def start(key=''):
"""
Start training models with Ultralytics HUB. Usage: from ultralytics.hub import start; start('API_KEY')
Start training models with Ultralytics HUB (DEPRECATED).
Args:
key (str, optional): A string containing either the API key and model ID combination (apikey_modelid),
or the full model URL (https://hub.ultralytics.com/models/apikey_modelid).
"""
auth = Auth(key)
model_id = split_key(key)[1] if auth.get_state() else request_api_key(auth)
if not model_id:
raise ConnectionError(emojis('Connecting with global API key is not currently supported. ❌'))
LOGGER.warning(f"""
WARNING ⚠️ ultralytics.start() is deprecated in 8.0.60. Updated usage to train your Ultralytics HUB model is below:
session = HUBTrainingSession(model_id=model_id, auth=auth)
session.check_disk_space()
from ultralytics import YOLO
model = YOLO(model=session.model_file, session=session)
model.train(**session.train_args)
def request_api_key(auth, max_attempts=3):
"""
Prompt the user to input their API key. Returns the model ID.
"""
import getpass
for attempts in range(max_attempts):
LOGGER.info(f'{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}')
input_key = getpass.getpass(
'Enter your Ultralytics API Key from https://hub.ultralytics.com/settings?tab=api+keys:\n')
auth.api_key, model_id = split_key(input_key)
if auth.authenticate():
LOGGER.info(f'{PREFIX}Authenticated ✅')
return model_id
LOGGER.warning(f'{PREFIX}Invalid API key ⚠️\n')
raise ConnectionError(emojis(f'{PREFIX}Failed to authenticate ❌'))
model = YOLO('https://hub.ultralytics.com/models/{key}')
model.train()""")
def reset_model(key=''):

View File

@ -2,27 +2,74 @@
import requests
from ultralytics.hub.utils import HUB_API_ROOT, request_with_credentials
from ultralytics.yolo.utils import is_colab
from ultralytics.hub.utils import HUB_API_ROOT, PREFIX, request_with_credentials
from ultralytics.yolo.utils import LOGGER, SETTINGS, emojis, is_colab, set_settings
API_KEY_PATH = 'https://hub.ultralytics.com/settings?tab=api+keys'
API_KEY_URL = 'https://hub.ultralytics.com/settings?tab=api+keys'
class Auth:
id_token = api_key = model_key = False
def __init__(self, api_key=None):
self.api_key = self._clean_api_key(api_key)
self.authenticate() if self.api_key else self.auth_with_cookies()
def __init__(self, api_key=''):
"""
Initialize the Auth class with an optional API key.
@staticmethod
def _clean_api_key(key: str) -> str:
"""Strip model from key if present"""
separator = '_'
return key.split(separator)[0] if separator in key else key
Args:
api_key (str, optional): May be an API key or a combination API key and model ID, i.e. key_id
"""
# Split the input API key in case it contains a combined key_model and keep only the API key part
api_key = api_key.split('_')[0]
# Set API key attribute as value passed or SETTINGS API key if none passed
self.api_key = api_key or SETTINGS.get('api_key', '')
# If an API key is provided
if self.api_key:
# If the provided API key matches the API key in the SETTINGS
if self.api_key == SETTINGS.get('api_key'):
# Log that the user is already logged in
LOGGER.info(f'{PREFIX}Authenticated ✅')
return
else:
# Attempt to authenticate with the provided API key
success = self.authenticate()
# If the API key is not provided and the environment is a Google Colab notebook
elif is_colab():
# Attempt to authenticate using browser cookies
success = self.auth_with_cookies()
else:
# Request an API key
success = self.request_api_key()
# Update SETTINGS with the new API key after successful authentication
if success:
set_settings({'api_key': self.api_key})
# Log that the new login was successful
LOGGER.info(f'{PREFIX}New authentication successful ✅')
else:
LOGGER.info(f'{PREFIX}Retrieve API key from {API_KEY_URL}')
def request_api_key(self, max_attempts=3):
"""
Prompt the user to input their API key. Returns the model ID.
"""
import getpass
for attempts in range(max_attempts):
LOGGER.info(f'{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}')
input_key = getpass.getpass(f'Enter API key from {API_KEY_URL} ')
self.api_key = input_key.split('_')[0] # remove model id if present
if self.authenticate():
return True
raise ConnectionError(emojis(f'{PREFIX}Failed to authenticate ❌'))
def authenticate(self) -> bool:
"""Attempt to authenticate with server"""
"""
Attempt to authenticate with the server using either id_token or API key.
Returns:
bool: True if authentication is successful, False otherwise.
"""
try:
header = self.get_auth_header()
if header:
@ -33,12 +80,16 @@ class Auth:
raise ConnectionError('User has not authenticated locally.')
except ConnectionError:
self.id_token = self.api_key = False # reset invalid
LOGGER.warning(f'{PREFIX}Invalid API key ⚠️')
return False
def auth_with_cookies(self) -> bool:
"""
Attempt to fetch authentication via cookies and set id_token.
User must be logged in to HUB and running in a supported browser.
Returns:
bool: True if authentication is successful, False otherwise.
"""
if not is_colab():
return False # Currently only works with Colab
@ -54,6 +105,12 @@ class Auth:
return False
def get_auth_header(self):
"""
Get the authentication header for making API requests.
Returns:
dict: The authentication header if id_token or API key is set, None otherwise.
"""
if self.id_token:
return {'authorization': f'Bearer {self.id_token}'}
elif self.api_key:
@ -62,9 +119,19 @@ class Auth:
return None
def get_state(self) -> bool:
"""Get the authentication state"""
"""
Get the authentication state.
Returns:
bool: True if either id_token or API key is set, False otherwise.
"""
return self.id_token or self.api_key
def set_api_key(self, key: str):
"""Get the authentication state"""
"""
Set the API key for authentication.
Args:
key (str): The API key string.
"""
self.api_key = key

View File

@ -6,17 +6,62 @@ from time import sleep
import requests
from ultralytics.hub.utils import HUB_API_ROOT, check_dataset_disk_space, smart_request
from ultralytics.yolo.utils import LOGGER, PREFIX, __version__, checks, emojis, is_colab, threaded
from ultralytics.hub.utils import HUB_API_ROOT, PREFIX, check_dataset_disk_space, smart_request
from ultralytics.yolo.utils import LOGGER, __version__, checks, emojis, is_colab, threaded
AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local'
class HUBTrainingSession:
"""
HUB training session for Ultralytics HUB YOLO models. Handles model initialization, heartbeats, and checkpointing.
def __init__(self, model_id, auth):
Args:
url (str): Model identifier used to initialize the HUB training session.
Attributes:
agent_id (str): Identifier for the instance communicating with the server.
model_id (str): Identifier for the YOLOv5 model being trained.
model_url (str): URL for the model in Ultralytics HUB.
api_url (str): API URL for the model in Ultralytics HUB.
auth_header (Dict): Authentication header for the Ultralytics HUB API requests.
rate_limits (Dict): Rate limits for different API calls (in seconds).
timers (Dict): Timers for rate limiting.
metrics_queue (Dict): Queue for the model's metrics.
model (Dict): Model data fetched from Ultralytics HUB.
alive (bool): Indicates if the heartbeat loop is active.
"""
def __init__(self, url):
"""
Initialize the HUBTrainingSession with the provided model identifier.
Args:
url (str): Model identifier used to initialize the HUB training session.
It can be a URL string or a model key with specific format.
Raises:
ValueError: If the provided model identifier is invalid.
ConnectionError: If connecting with global API key is not supported.
"""
from ultralytics.hub.auth import Auth
# Parse input
if url.startswith('https://hub.ultralytics.com/models/'):
url = url.split('https://hub.ultralytics.com/models/')[-1]
if [len(x) for x in url.split('_')] == [42, 20]:
key, model_id = url.split('_')
elif len(url) == 20:
key, model_id = '', url
else:
raise ValueError(f'Invalid HUBTrainingSession input: {url}')
# Authorize
auth = Auth(key)
self.agent_id = None # identifies which instance is communicating with server
self.model_id = model_id
self.model_url = f'https://hub.ultralytics.com/models/{model_id}'
self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}'
self.auth_header = auth.get_auth_header()
self.rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0} # rate limits (seconds)
@ -26,16 +71,17 @@ class HUBTrainingSession:
self.alive = True
self._start_heartbeat() # start heartbeats
self._register_signal_handlers()
LOGGER.info(f'{PREFIX}View model at {self.model_url} 🚀')
def _register_signal_handlers(self):
"""Register signal handlers for SIGTERM and SIGINT signals to gracefully handle termination."""
signal.signal(signal.SIGTERM, self._handle_signal)
signal.signal(signal.SIGINT, self._handle_signal)
def _handle_signal(self, signum, frame):
"""
Prevent heartbeats from being sent on Colab after kill.
This method does not use frame, it is included as it is
passed by signal.
Handle kill signals and prevent heartbeats from being sent on Colab after termination.
This method does not use frame, it is included as it is passed by signal.
"""
if self.alive is True:
LOGGER.info(f'{PREFIX}Kill signal received! ❌')
@ -43,15 +89,16 @@ class HUBTrainingSession:
sys.exit(signum)
def _stop_heartbeat(self):
"""End the heartbeat loop"""
"""Terminate the heartbeat loop."""
self.alive = False
def upload_metrics(self):
"""Upload model metrics to Ultralytics HUB."""
payload = {'metrics': self.metrics_queue.copy(), 'type': 'metrics'}
smart_request('post', self.api_url, json=payload, headers=self.auth_header, code=2)
def _get_model(self):
# Returns model from database by id
"""Fetch and return model data from Ultralytics HUB."""
api_url = f'{HUB_API_ROOT}/v1/models/{self.model_id}'
try:
@ -59,9 +106,7 @@ class HUBTrainingSession:
data = response.json().get('data', None)
if data.get('status', None) == 'trained':
raise ValueError(
emojis(f'Model is already trained and uploaded to '
f'https://hub.ultralytics.com/models/{self.model_id} 🚀'))
raise ValueError(emojis(f'Model is already trained and uploaded to {self.model_url} 🚀'))
if not data.get('data', None):
raise ValueError('Dataset may still be processing. Please wait a minute and try again.') # RF fix
@ -88,11 +133,21 @@ class HUBTrainingSession:
raise
def check_disk_space(self):
if not check_dataset_disk_space(self.model['data']):
"""Check if there is enough disk space for the dataset."""
if not check_dataset_disk_space(url=self.model['data']):
raise MemoryError('Not enough disk space')
def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
# Upload a model to HUB
"""
Upload a model checkpoint to Ultralytics HUB.
Args:
epoch (int): The current training epoch.
weights (str): Path to the model weights file.
is_best (bool): Indicates if the current model is the best one so far.
map (float): Mean average precision of the model.
final (bool): Indicates if the model is the final model after training.
"""
if Path(weights).is_file():
with open(weights, 'rb') as f:
file = f.read()
@ -120,6 +175,7 @@ class HUBTrainingSession:
@threaded
def _start_heartbeat(self):
"""Begin a threaded heartbeat loop to report the agent's status to Ultralytics HUB."""
while self.alive:
r = smart_request('post',
f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}',

View File

@ -22,7 +22,16 @@ HUB_API_ROOT = os.environ.get('ULTRALYTICS_HUB_API', 'https://api.ultralytics.co
def check_dataset_disk_space(url='https://ultralytics.com/assets/coco128.zip', sf=2.0):
# Check that url fits on disk with safety factor sf, i.e. require 2GB free if url size is 1GB with sf=2.0
"""
Check if there is sufficient disk space to download and store a dataset.
Args:
url (str, optional): The URL to the dataset file. Defaults to 'https://ultralytics.com/assets/coco128.zip'.
sf (float, optional): Safety factor, the multiplier for the required free space. Defaults to 2.0.
Returns:
bool: True if there is sufficient disk space, False otherwise.
"""
gib = 1 << 30 # bytes per GiB
data = int(requests.head(url).headers['Content-Length']) / gib # dataset size (GB)
total, used, free = (x / gib for x in shutil.disk_usage('/')) # bytes
@ -35,7 +44,18 @@ def check_dataset_disk_space(url='https://ultralytics.com/assets/coco128.zip', s
def request_with_credentials(url: str) -> any:
""" Make an ajax request with cookies attached """
"""
Make an AJAX request with cookies attached in a Google Colab environment.
Args:
url (str): The URL to make the request to.
Returns:
any: The response data from the AJAX request.
Raises:
OSError: If the function is not run in a Google Colab environment.
"""
if not is_colab():
raise OSError('request_with_credentials() must run in a Colab environment')
from google.colab import output # noqa
@ -95,7 +115,6 @@ def requests_with_progress(method, url, **kwargs):
Returns:
requests.Response: The response from the HTTP request.
"""
progress = kwargs.pop('progress', False)
if not progress:
@ -126,7 +145,6 @@ def smart_request(method, url, retry=3, timeout=30, thread=True, code=-1, verbos
Returns:
requests.Response: The HTTP response object. If the request is executed in a separate thread, returns None.
"""
retry_codes = (408, 500) # retry only these codes
@ -171,8 +189,8 @@ class Traces:
def __init__(self):
"""
Initialize Traces for error tracking and reporting if tests are not currently running.
Sets the rate limit, timer, and metadata attributes, and determines whether Traces are enabled.
"""
from ultralytics.yolo.cfg import MODES, TASKS
self.rate_limit = 60.0 # rate limit (seconds)
self.t = 0.0 # rate limit timer (seconds)
self.metadata = {
@ -187,17 +205,22 @@ class Traces:
not TESTS_RUNNING and \
ONLINE and \
(is_pip_package() or get_git_origin_url() == 'https://github.com/ultralytics/ultralytics.git')
self.usage = {'tasks': {k: 0 for k in TASKS}, 'modes': {k: 0 for k in MODES}}
self._reset_usage()
def __call__(self, cfg, all_keys=False, traces_sample_rate=1.0):
"""
Sync traces data if enabled in the global settings
Sync traces data if enabled in the global settings.
Args:
cfg (IterableSimpleNamespace): Configuration for the task and mode.
all_keys (bool): Sync all items, not just non-default values.
traces_sample_rate (float): Fraction of traces captured from 0.0 to 1.0
traces_sample_rate (float): Fraction of traces captured from 0.0 to 1.0.
"""
# Increment usage
self.usage['modes'][cfg.mode] = self.usage['modes'].get(cfg.mode, 0) + 1
self.usage['tasks'][cfg.task] = self.usage['tasks'].get(cfg.task, 0) + 1
t = time.time() # current time
if not self.enabled or random() > traces_sample_rate:
# Traces disabled or not randomly selected, do nothing
@ -207,18 +230,20 @@ class Traces:
return
else:
# Time is over rate limiter, send trace now
self.t = t # reset rate limit timer
# Build trace
if cfg.task in self.usage['tasks']:
self.usage['tasks'][cfg.task] += 1
if cfg.mode in self.usage['modes']:
self.usage['modes'][cfg.mode] += 1
trace = {'uuid': SETTINGS['uuid'], 'usage': self.usage, 'metadata': self.metadata}
trace = {'uuid': SETTINGS['uuid'], 'usage': self.usage.copy(), 'metadata': self.metadata}
# Send a request to the HUB API to sync analytics
smart_request('post', f'{HUB_API_ROOT}/v1/usage/anonymous', json=trace, code=3, retry=0, verbose=False)
# Reset usage and rate limit timer
self._reset_usage()
self.t = t
def _reset_usage(self):
"""Reset the usage dictionary by initializing keys for each task and mode with a value of 0."""
from ultralytics.yolo.cfg import MODES, TASKS
self.usage = {'tasks': {k: 0 for k in TASKS}, 'modes': {k: 0 for k in MODES}}
# Run below code on hub/utils init -------------------------------------------------------------------------------------
traces = Traces()