YOLOv8-16bit/ultralytics/yolo/utils/downloads.py

import logging
import os
import subprocess
import urllib
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from zipfile import ZipFile

import requests
import torch

from ultralytics.yolo.utils import LOGGER


def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
    # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
    file = Path(file)
    assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
    try:  # url1
        LOGGER.info(f'Downloading {url} to {file}...')
        torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
        assert file.exists() and file.stat().st_size > min_bytes, assert_msg  # check
    except Exception as e:  # url2
        if file.exists():
            file.unlink()  # remove partial downloads
        LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
        os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -")  # curl download, retry and resume on fail
    finally:
        if not file.exists() or file.stat().st_size < min_bytes:  # check
            if file.exists():
                file.unlink()  # remove partial downloads
            LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}")
        LOGGER.info('')


def is_url(url, check=True):
    # Check if string is URL and check if URL exists
    try:
        url = str(url)
        result = urllib.parse.urlparse(url)
        assert all([result.scheme, result.netloc])  # check if is url
        return (urllib.request.urlopen(url).getcode() == 200) if check else True  # check if exists online
    except (AssertionError, urllib.request.HTTPError):
        return False


def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
    # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc.

    def github_assets(repository, version='latest'):
        # Return GitHub repo tag and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...])
        if version != 'latest':
            version = f'tags/{version}'  # i.e. tags/v6.2
        response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json()  # github api
        return response['tag_name'], [x['name'] for x in response['assets']]  # tag, assets

    file = Path(str(file).strip().replace("'", ''))
    if not file.exists():
        # URL specified
        name = Path(urllib.parse.unquote(str(file))).name  # decode '%2F' to '/' etc.
        if str(file).startswith(('http:/', 'https:/')):  # download
            url = str(file).replace(':/', '://')  # Pathlib turns :// -> :/
            file = name.split('?')[0]  # parse authentication https://url.com/file.txt?auth...
            if Path(file).is_file():
                LOGGER.info(f'Found {url} locally at {file}')  # file already exists
            else:
                safe_download(file=file, url=url, min_bytes=1E5)
            return file

        # GitHub assets
        assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')]  # default
        try:
            tag, assets = github_assets(repo, release)
        except Exception:
            try:
                tag, assets = github_assets(repo)  # latest release
            except Exception:
                try:
                    tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
                except Exception:
                    tag = release

        file.parent.mkdir(parents=True, exist_ok=True)  # make parent dir (if required)
        if name in assets:
            url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl'  # backup gdrive mirror
            safe_download(
                file,
                url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
                min_bytes=1E5,
                error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')

    return str(file)


def download(url, dir=Path.cwd(), unzip=True, delete=True, curl=False, threads=1, retry=3):
    # Multithreaded file download and unzip function, used in data.yaml for autodownload
    def download_one(url, dir):
        # Download 1 file
        success = True
        if Path(url).is_file():
            f = Path(url)  # filename
        else:  # does not exist
            f = dir / Path(url).name
            LOGGER.info(f'Downloading {url} to {f}...')
            for i in range(retry + 1):
                if curl:
                    s = 'sS' if threads > 1 else ''  # silent
                    r = os.system(
                        f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -')  # curl download with retry, continue
                    success = r == 0
                else:
                    torch.hub.download_url_to_file(url, f, progress=threads == 1)  # torch download
                    success = f.is_file()
                if success:
                    break
                elif i < retry:
                    LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
                else:
                    LOGGER.warning(f'❌ Failed to download {url}...')

        if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
            LOGGER.info(f'Unzipping {f}...')
            if f.suffix == '.zip':
                ZipFile(f).extractall(path=dir)  # unzip
            elif f.suffix == '.tar':
                os.system(f'tar xf {f} --directory {f.parent}')  # unzip
            elif f.suffix == '.gz':
                os.system(f'tar xfz {f} --directory {f.parent}')  # unzip
            if delete:
                f.unlink()  # remove zip

    dir = Path(dir)
    dir.mkdir(parents=True, exist_ok=True)  # make directory
    if threads > 1:
        pool = ThreadPool(threads)
        pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))  # multithreaded
        pool.close()
        pool.join()
    else:
        for u in [url] if isinstance(url, (str, Path)) else url:
            download_one(u, dir)
Model builder (#29) Co-authored-by: Ayush Chaurasia <ayush.chuararsia@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 years ago			`import logging`
			`import os`
			`import subprocess`
			`import urllib`
			`from itertools import repeat`
			`from multiprocessing.pool import ThreadPool`
			`from pathlib import Path`
			`from zipfile import ZipFile`

			`import requests`
			`import torch`

			`from ultralytics.yolo.utils import LOGGER`


			`def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):`
			`# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes`
			`file = Path(file)`
			`assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"`
			`try: # url1`
			`LOGGER.info(f'Downloading {url} to {file}...')`
			`torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)`
			`assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check`
			`except Exception as e: # url2`
			`if file.exists():`
			`file.unlink() # remove partial downloads`
			`LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')`
			`os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail`
			`finally:`
			`if not file.exists() or file.stat().st_size < min_bytes: # check`
			`if file.exists():`
			`file.unlink() # remove partial downloads`
			`LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}")`
			`LOGGER.info('')`


			`def is_url(url, check=True):`
			`# Check if string is URL and check if URL exists`
			`try:`
			`url = str(url)`
			`result = urllib.parse.urlparse(url)`
			`assert all([result.scheme, result.netloc]) # check if is url`
			`return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online`
			`except (AssertionError, urllib.request.HTTPError):`
			`return False`


			`def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):`
			`# Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc.`

			`def github_assets(repository, version='latest'):`
			`# Return GitHub repo tag and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...])`
			`if version != 'latest':`
			`version = f'tags/{version}' # i.e. tags/v6.2`
			`response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api`
			`return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets`

			`file = Path(str(file).strip().replace("'", ''))`
			`if not file.exists():`
			`# URL specified`
			`name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.`
			`if str(file).startswith(('http:/', 'https:/')): # download`
			`url = str(file).replace(':/', '://') # Pathlib turns :// -> :/`
			`file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...`
			`if Path(file).is_file():`
			`LOGGER.info(f'Found {url} locally at {file}') # file already exists`
			`else:`
			`safe_download(file=file, url=url, min_bytes=1E5)`
			`return file`

			`# GitHub assets`
			`assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default`
			`try:`
			`tag, assets = github_assets(repo, release)`
			`except Exception:`
			`try:`
			`tag, assets = github_assets(repo) # latest release`
			`except Exception:`
			`try:`
			`tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]`
			`except Exception:`
			`tag = release`

			`file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)`
			`if name in assets:`
			`url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror`
			`safe_download(`
			`file,`
			`url=f'https://github.com/{repo}/releases/download/{tag}/{name}',`
			`min_bytes=1E5,`
			`error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')`

			`return str(file)`


			`def download(url, dir=Path.cwd(), unzip=True, delete=True, curl=False, threads=1, retry=3):`
			`# Multithreaded file download and unzip function, used in data.yaml for autodownload`
			`def download_one(url, dir):`
			`# Download 1 file`
			`success = True`
			`if Path(url).is_file():`
			`f = Path(url) # filename`
			`else: # does not exist`
			`f = dir / Path(url).name`
			`LOGGER.info(f'Downloading {url} to {f}...')`
			`for i in range(retry + 1):`
			`if curl:`
			`s = 'sS' if threads > 1 else '' # silent`
			`r = os.system(`
			`f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue`
			`success = r == 0`
			`else:`
			`torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download`
			`success = f.is_file()`
			`if success:`
			`break`
			`elif i < retry:`
			`LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')`
			`else:`
			`LOGGER.warning(f'❌ Failed to download {url}...')`

			`if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):`
			`LOGGER.info(f'Unzipping {f}...')`
			`if f.suffix == '.zip':`
			`ZipFile(f).extractall(path=dir) # unzip`
			`elif f.suffix == '.tar':`
			`os.system(f'tar xf {f} --directory {f.parent}') # unzip`
			`elif f.suffix == '.gz':`
			`os.system(f'tar xfz {f} --directory {f.parent}') # unzip`
			`if delete:`
			`f.unlink() # remove zip`

			`dir = Path(dir)`
			`dir.mkdir(parents=True, exist_ok=True) # make directory`
			`if threads > 1:`
			`pool = ThreadPool(threads)`
			`pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multithreaded`
			`pool.close()`
			`pool.join()`
			`else:`
			`for u in [url] if isinstance(url, (str, Path)) else url:`
			`download_one(u, dir)`