"""Implementation of Apache VFS schemes and URLs."""

import sys
import re
from urllib.parse import urlparse


# Supported URI schemes and their mapping to GDAL's VSI suffix.
# TODO: extend for other cloud plaforms.
SCHEMES = {
    'ftp': 'curl',
    'gzip': 'gzip',
    'http': 'curl',
    'https': 'curl',
    's3': 's3',
    'tar': 'tar',
    'zip': 'zip',
    'gs': 'gs',
}

CURLSCHEMES = {k for k, v in SCHEMES.items() if v == 'curl'}

# TODO: extend for other cloud plaforms.
REMOTESCHEMES = {k for k, v in SCHEMES.items() if v in ('curl', 's3', 'gs')}


def valid_vsi(vsi):
    """Ensures all parts of our vsi path are valid schemes."""
    return all(p in SCHEMES for p in vsi.split('+'))


def is_remote(scheme):
    if scheme is None:
        return False
    return any(p in REMOTESCHEMES for p in scheme.split('+'))


def vsi_path(path, vsi=None, archive=None):
    # If a VSI and archive file are specified, we convert the path to
    # an OGR VSI path (see cpl_vsi.h).
    if vsi:
        prefix = '/'.join(f'vsi{SCHEMES[p]}' for p in vsi.split('+'))
        if archive:
            result = f'/{prefix}/{archive}{path}'
        else:
            result = f'/{prefix}/{path}'
    else:
        result = path

    return result


def parse_paths(uri, vfs=None):
    """Parse a URI or Apache VFS URL into its parts

    Returns: tuple
        (path, scheme, archive)
    """
    archive = scheme = None
    path = uri
    # Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
    # URL schemes
    if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
        return path, None, None
    if vfs:
        parts = urlparse(vfs)
        scheme = parts.scheme
        archive = parts.path
        if parts.netloc and parts.netloc != 'localhost':
            archive = parts.netloc + archive
    else:
        parts = urlparse(path)
        scheme = parts.scheme
        path = parts.path
        if parts.netloc and parts.netloc != 'localhost':
            if scheme.split("+")[-1] in CURLSCHEMES:
                # We need to deal with cases such as zip+https://server.com/data.zip
                path = "{}://{}{}".format(scheme.split("+")[-1], parts.netloc, path)
            else:
                path = parts.netloc + path
        if scheme in SCHEMES:
            parts = path.split('!')
            path = parts.pop() if parts else None
            archive = parts.pop() if parts else None

    scheme = None if not scheme else scheme
    return path, scheme, archive
