YOLOv8-16bit/ultralytics/yolo/utils/downloads.py

# Ultralytics YOLO 🚀, GPL-3.0 license

import logging
import os
import subprocess
import urllib
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from zipfile import ZipFile

import requests
import torch

from ultralytics.yolo.utils import LOGGER, SETTINGS


def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
    # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
    file = Path(file)
    assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
    try:  # url1
        LOGGER.info(f'Downloading {url} to {file}...')
        torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
        assert file.exists() and file.stat().st_size > min_bytes, assert_msg  # check
    except Exception as e:  # url2
        if file.exists():
            file.unlink()  # remove partial downloads
        LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
        os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -")  # curl download, retry and resume on fail
    finally:
        if not file.exists() or file.stat().st_size < min_bytes:  # check
            if file.exists():
                file.unlink()  # remove partial downloads
            LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}")
        LOGGER.info('')


def is_url(url, check=True):
    # Check if string is URL and check if URL exists
    try:
        url = str(url)
        result = urllib.parse.urlparse(url)
        assert all([result.scheme, result.netloc])  # check if is url
        return (urllib.request.urlopen(url).getcode() == 200) if check else True  # check if exists online
    except (AssertionError, urllib.request.HTTPError):
        return False


def attempt_download(file, repo='ultralytics/assets', release='v0.0.0'):
    # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc.

    def github_assets(repository, version='latest'):
        # Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov5m.pt', ...])
        # Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov8s.pt', ...])
        if version != 'latest':
            version = f'tags/{version}'  # i.e. tags/v6.2
        response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json()  # github api
        return response['tag_name'], [x['name'] for x in response['assets']]  # tag, assets

    file = Path(str(file).strip().replace("'", ''))
    if file.exists():
        return str(file)
    elif (SETTINGS['weights_dir'] / file).exists():
        return str(SETTINGS['weights_dir'] / file)
    else:
        # URL specified
        name = Path(urllib.parse.unquote(str(file))).name  # decode '%2F' to '/' etc.
        if str(file).startswith(('http:/', 'https:/')):  # download
            url = str(file).replace(':/', '://')  # Pathlib turns :// -> :/
            file = name.split('?')[0]  # parse authentication https://url.com/file.txt?auth...
            if Path(file).is_file():
                LOGGER.info(f'Found {url} locally at {file}')  # file already exists
            else:
                safe_download(file=file, url=url, min_bytes=1E5)
            return file

        # GitHub assets
        assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')]  # default
        assets = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')]  # default
        try:
            tag, assets = github_assets(repo, release)
        except Exception:
            try:
                tag, assets = github_assets(repo)  # latest release
            except Exception:
                try:
                    tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
                except Exception:
                    tag = release

        file.parent.mkdir(parents=True, exist_ok=True)  # make parent dir (if required)
        if name in assets:
            safe_download(file,
                          url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
                          min_bytes=1E5,
                          error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag}')

        return str(file)


def download(url, dir=Path.cwd(), unzip=True, delete=True, curl=False, threads=1, retry=3):
    # Multithreaded file download and unzip function, used in data.yaml for autodownload
    def download_one(url, dir):
        # Download 1 file
        success = True
        if '://' not in str(url) and Path(url).is_file():  # exists ('://' check required in Windows Python<3.10)
            f = Path(url)  # filename
        else:  # does not exist
            f = dir / Path(url).name
            LOGGER.info(f'Downloading {url} to {f}...')
            for i in range(retry + 1):
                if curl:
                    s = 'sS' if threads > 1 else ''  # silent
                    r = os.system(
                        f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -')  # curl download with retry, continue
                    success = r == 0
                else:
                    torch.hub.download_url_to_file(url, f, progress=threads == 1)  # torch download
                    success = f.is_file()
                if success:
                    break
                elif i < retry:
                    LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
                else:
                    LOGGER.warning(f'❌ Failed to download {url}...')

        if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
            LOGGER.info(f'Unzipping {f}...')
            if f.suffix == '.zip':
                ZipFile(f).extractall(path=dir)  # unzip
            elif f.suffix == '.tar':
                os.system(f'tar xf {f} --directory {f.parent}')  # unzip
            elif f.suffix == '.gz':
                os.system(f'tar xfz {f} --directory {f.parent}')  # unzip
            if delete:
                f.unlink()  # remove zip

    dir = Path(dir)
    dir.mkdir(parents=True, exist_ok=True)  # make directory
    if threads > 1:
        # pool = ThreadPool(threads)
        # pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))  # multithreaded
        # pool.close()
        # pool.join()
        with ThreadPool(threads) as pool:
            pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))  # multithreaded
            pool.close()
            pool.join()
    else:
        for u in [url] if isinstance(url, (str, Path)) else url:
            download_one(u, dir)
Cleanup (#168) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Kalen Michael <kalenmike@gmail.com> 2 years ago			`# Ultralytics YOLO 🚀, GPL-3.0 license`

Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`import logging`
			`import os`
			`import subprocess`
			`import urllib`
			`from itertools import repeat`
			`from multiprocessing.pool import ThreadPool`
			`from pathlib import Path`
			`from zipfile import ZipFile`

			`import requests`
			`import torch`

General refactoring and improvements (#373) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`from ultralytics.yolo.utils import LOGGER, SETTINGS`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago

			`def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):`
			`# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes`
			`file = Path(file)`
			`assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"`
			`try: # url1`
			`LOGGER.info(f'Downloading {url} to {file}...')`
			`torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)`
			`assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check`
			`except Exception as e: # url2`
			`if file.exists():`
			`file.unlink() # remove partial downloads`
			`LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')`
			`os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail`
			`finally:`
			`if not file.exists() or file.stat().st_size < min_bytes: # check`
			`if file.exists():`
			`file.unlink() # remove partial downloads`
			`LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}")`
			`LOGGER.info('')`


			`def is_url(url, check=True):`
			`# Check if string is URL and check if URL exists`
			`try:`
			`url = str(url)`
			`result = urllib.parse.urlparse(url)`
			`assert all([result.scheme, result.netloc]) # check if is url`
			`return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online`
			`except (AssertionError, urllib.request.HTTPError):`
			`return False`


Fix load and resume and update autodownload endpoint (#136) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> 2 years ago			`def attempt_download(file, repo='ultralytics/assets', release='v0.0.0'):`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`# Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc.`

			`def github_assets(repository, version='latest'):`
Start export implementation (#110) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`# Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov5m.pt', ...])`
Fix load and resume and update autodownload endpoint (#136) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> 2 years ago			`# Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov8s.pt', ...])`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`if version != 'latest':`
			`version = f'tags/{version}' # i.e. tags/v6.2`
			`response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api`
			`return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets`

			`file = Path(str(file).strip().replace("'", ''))`
General refactoring and improvements (#373) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`if file.exists():`
			`return str(file)`
			`elif (SETTINGS['weights_dir'] / file).exists():`
			`return str(SETTINGS['weights_dir'] / file)`
			`else:`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`# URL specified`
			`name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.`
			`if str(file).startswith(('http:/', 'https:/')): # download`
			`url = str(file).replace(':/', '://') # Pathlib turns :// -> :/`
			`file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...`
			`if Path(file).is_file():`
			`LOGGER.info(f'Found {url} locally at {file}') # file already exists`
			`else:`
			`safe_download(file=file, url=url, min_bytes=1E5)`
			`return file`

			`# GitHub assets`
			`assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default`
Fix load and resume and update autodownload endpoint (#136) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> 2 years ago			`assets = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`try:`
			`tag, assets = github_assets(repo, release)`
			`except Exception:`
			`try:`
			`tag, assets = github_assets(repo) # latest release`
			`except Exception:`
			`try:`
			`tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]`
			`except Exception:`
			`tag = release`

			`file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)`
			`if name in assets:`
`ultralytics 8.0.19` seg/det dataset warning and DDP-cls/seg fixes (#595) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: 曾逸夫（Zeng Yifu） <41098760+Zengyf-CVer@users.noreply.github.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> 2 years ago			`safe_download(file,`
			`url=f'https://github.com/{repo}/releases/download/{tag}/{name}',`
			`min_bytes=1E5,`
			`error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag}')`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago
General refactoring and improvements (#373) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`return str(file)`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago

			`def download(url, dir=Path.cwd(), unzip=True, delete=True, curl=False, threads=1, retry=3):`
			`# Multithreaded file download and unzip function, used in data.yaml for autodownload`
			`def download_one(url, dir):`
			`# Download 1 file`
			`success = True`
`ultralytics 8.0.21` Windows, segments, YAML fixes (#655) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: corey-nm <109536191+corey-nm@users.noreply.github.com> 2 years ago			`if '://' not in str(url) and Path(url).is_file(): # exists ('://' check required in Windows Python<3.10)`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`f = Path(url) # filename`
			`else: # does not exist`
			`f = dir / Path(url).name`
			`LOGGER.info(f'Downloading {url} to {f}...')`
			`for i in range(retry + 1):`
			`if curl:`
			`s = 'sS' if threads > 1 else '' # silent`
			`r = os.system(`
			`f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue`
			`success = r == 0`
			`else:`
			`torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download`
			`success = f.is_file()`
			`if success:`
			`break`
			`elif i < retry:`
			`LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')`
			`else:`
			`LOGGER.warning(f'❌ Failed to download {url}...')`

			`if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):`
			`LOGGER.info(f'Unzipping {f}...')`
			`if f.suffix == '.zip':`
			`ZipFile(f).extractall(path=dir) # unzip`
			`elif f.suffix == '.tar':`
			`os.system(f'tar xf {f} --directory {f.parent}') # unzip`
			`elif f.suffix == '.gz':`
			`os.system(f'tar xfz {f} --directory {f.parent}') # unzip`
			`if delete:`
			`f.unlink() # remove zip`

			`dir = Path(dir)`
			`dir.mkdir(parents=True, exist_ok=True) # make directory`
			`if threads > 1:`
Threadpool fixes and CLI improvements (#550) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com> 2 years ago			`# pool = ThreadPool(threads)`
			`# pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multithreaded`
			`# pool.close()`
			`# pool.join()`
			`with ThreadPool(threads) as pool:`
			`pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multithreaded`
			`pool.close()`
			`pool.join()`
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`else:`
			`for u in [url] if isinstance(url, (str, Path)) else url:`
			`download_one(u, dir)`