from __future__ import annotations

# Allow direct execution
import os
import sys

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import enum
import itertools
import json
import logging
import re
from collections import defaultdict
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path

from devscripts.utils import read_file, run_process, write_file

BASE_URL = 'https://github.com'
LOCATION_PATH = Path(__file__).parent
HASH_LENGTH = 7

logger = logging.getLogger(__name__)


class CommitGroup(enum.Enum):
    PRIORITY = 'Important'
    CORE = 'Core'
    EXTRACTOR = 'Extractor'
    DOWNLOADER = 'Downloader'
    POSTPROCESSOR = 'Postprocessor'
    NETWORKING = 'Networking'
    MISC = 'Misc.'

    @classmethod
    @lru_cache
    def subgroup_lookup(cls):
        return {
            name: group
            for group, names in {
                cls.MISC: {
                    'build',
                    'ci',
                    'cleanup',
                    'devscripts',
                    'docs',
                    'test',
                },
                cls.NETWORKING: {
                    'rh',
                },
            }.items()
            for name in names
        }

    @classmethod
    @lru_cache
    def group_lookup(cls):
        result = {
            'fd': cls.DOWNLOADER,
            'ie': cls.EXTRACTOR,
            'pp': cls.POSTPROCESSOR,
            'upstream': cls.CORE,
        }
        result.update({item.name.lower(): item for item in iter(cls)})
        return result

    @classmethod
    def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
        group, _, subgroup = (group.strip().lower() for group in value.partition('/'))

        result = cls.group_lookup().get(group)
        if not result:
            if subgroup:
                return None, value
            subgroup = group
            result = cls.subgroup_lookup().get(subgroup)

        return result, subgroup or None


@dataclass
class Commit:
    hash: str | None
    short: str
    authors: list[str]

    def __str__(self):
        result = f'{self.short!r}'

        if self.hash:
            result += f' ({self.hash[:HASH_LENGTH]})'

        if self.authors:
            authors = ', '.join(self.authors)
            result += f' by {authors}'

        return result


@dataclass
class CommitInfo:
    details: str | None
    sub_details: tuple[str, ...]
    message: str
    issues: list[str]
    commit: Commit
    fixes: list[Commit]

    def key(self):
        return ((self.details or '').lower(), self.sub_details, self.message)


def unique(items):
    return sorted({item.strip().lower(): item for item in items if item}.values())


class Changelog:
    MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
    ALWAYS_SHOWN = (CommitGroup.PRIORITY,)

    def __init__(self, groups, repo, collapsible=False):
        self._groups = groups
        self._repo = repo
        self._collapsible = collapsible

    def __str__(self):
        return '\n'.join(self._format_groups(self._groups)).replace('\t', '    ')

    def _format_groups(self, groups):
        first = True
        for item in CommitGroup:
            if self._collapsible and item not in self.ALWAYS_SHOWN and first:
                first = False
                yield '\n<details><summary><h3>Changelog</h3></summary>\n'

            group = groups[item]
            if group:
                yield self.format_module(item.value, group)

        if self._collapsible:
            yield '\n</details>'

    def format_module(self, name, group):
        result = f'\n#### {name} changes\n' if name else '\n'
        return result + '\n'.join(self._format_group(group))

    def _format_group(self, group):
        sorted_group = sorted(group, key=CommitInfo.key)
        detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
        for _, items in detail_groups:
            items = list(items)
            details = items[0].details

            if details == 'cleanup':
                items = self._prepare_cleanup_misc_items(items)

            prefix = '-'
            if details:
                if len(items) == 1:
                    prefix = f'- **{details}**:'
                else:
                    yield f'- **{details}**'
                    prefix = '\t-'

            sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
            for sub_details, entries in sub_detail_groups:
                if not sub_details:
                    for entry in entries:
                        yield f'{prefix} {self.format_single_change(entry)}'
                    continue

                entries = list(entries)
                sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
                if len(entries) == 1:
                    yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
                    continue

                yield sub_prefix
                for entry in entries:
                    yield f'\t{prefix} {self.format_single_change(entry)}'

    def _prepare_cleanup_misc_items(self, items):
        cleanup_misc_items = defaultdict(list)
        sorted_items = []
        for item in items:
            if self.MISC_RE.search(item.message):
                cleanup_misc_items[tuple(item.commit.authors)].append(item)
            else:
                sorted_items.append(item)

        for commit_infos in cleanup_misc_items.values():
            sorted_items.append(CommitInfo(
                'cleanup', ('Miscellaneous',), ', '.join(
                    self._format_message_link(None, info.commit.hash)
                    for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
                [], Commit(None, '', commit_infos[0].commit.authors), []))

        return sorted_items

    def format_single_change(self, info: CommitInfo):
        message, sep, rest = info.message.partition('\n')
        if '[' not in message:
            # If the message doesn't already contain markdown links, try to add a link to the commit
            message = self._format_message_link(message, info.commit.hash)

        if info.issues:
            message = f'{message} ({self._format_issues(info.issues)})'

        if info.commit.authors:
            message = f'{message} by {self._format_authors(info.commit.authors)}'

        if info.fixes:
            fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)

            authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
            if authors != info.commit.authors:
                fix_message = f'{fix_message} by {self._format_authors(authors)}'

            message = f'{message} (With fixes in {fix_message})'

        return message if not sep else f'{message}{sep}{rest}'

    def _format_message_link(self, message, hash):
        assert message or hash, 'Improperly defined commit message or override'
        message = message if message else hash[:HASH_LENGTH]
        return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message

    def _format_issues(self, issues):
        return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)

    @staticmethod
    def _format_authors(authors):
        return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)

    @property
    def repo_url(self):
        return f'{BASE_URL}/{self._repo}'


class CommitRange:
    COMMAND = 'git'
    COMMIT_SEPARATOR = '-----'

    AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
    MESSAGE_RE = re.compile(r'''
        (?:\[(?P<prefix>[^\]]+)\]\ )?
        (?:(?P<sub_details>`?[\w.-]+`?): )?
        (?P<message>.+?)
        (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
        ''', re.VERBOSE | re.DOTALL)
    EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
    REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
    FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})')
    UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')

    def __init__(self, start, end, default_author=None):
        self._start, self._end = start, end
        self._commits, self._fixes = self._get_commits_and_fixes(default_author)
        self._commits_added = []

    def __iter__(self):
        return iter(itertools.chain(self._commits.values(), self._commits_added))

    def __len__(self):
        return len(self._commits) + len(self._commits_added)

    def __contains__(self, commit):
        if isinstance(commit, Commit):
            if not commit.hash:
                return False
            commit = commit.hash

        return commit in self._commits

    def _get_commits_and_fixes(self, default_author):
        result = run_process(
            self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
            f'{self._start}..{self._end}' if self._start else self._end).stdout

        commits, reverts = {}, {}
        fixes = defaultdict(list)
        lines = iter(result.splitlines(False))
        for i, commit_hash in enumerate(lines):
            short = next(lines)
            skip = short.startswith('Release ') or short == '[version] update'

            authors = [default_author] if default_author else []
            for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
                match = self.AUTHOR_INDICATOR_RE.match(line)
                if match:
                    authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)

            commit = Commit(commit_hash, short, authors)
            if skip and (self._start or not i):
                logger.debug(f'Skipped commit: {commit}')
                continue
            elif skip:
                logger.debug(f'Reached Release commit, breaking: {commit}')
                break

            revert_match = self.REVERT_RE.fullmatch(commit.short)
            if revert_match:
                reverts[revert_match.group(1)] = commit
                continue

            fix_match = self.FIXES_RE.search(commit.short)
            if fix_match:
                commitish = fix_match.group(1)
                fixes[commitish].append(commit)

            commits[commit.hash] = commit

        for commitish, revert_commit in reverts.items():
            reverted = commits.pop(commitish, None)
            if reverted:
                logger.debug(f'{commitish} fully reverted {reverted}')
            else:
                commits[revert_commit.hash] = revert_commit

        for commitish, fix_commits in fixes.items():
            if commitish in commits:
                hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
                logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
                for fix_commit in fix_commits:
                    del commits[fix_commit.hash]
            else:
                logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')

        return commits, fixes

    def apply_overrides(self, overrides):
        for override in overrides:
            when = override.get('when')
            if when and when not in self and when != self._start:
                logger.debug(f'Ignored {when!r} override')
                continue

            override_hash = override.get('hash') or when
            if override['action'] == 'add':
                commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
                logger.info(f'ADD    {commit}')
                self._commits_added.append(commit)

            elif override['action'] == 'remove':
                if override_hash in self._commits:
                    logger.info(f'REMOVE {self._commits[override_hash]}')
                    del self._commits[override_hash]

            elif override['action'] == 'change':
                if override_hash not in self._commits:
                    continue
                commit = Commit(override_hash, override['short'], override.get('authors') or [])
                logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
                self._commits[commit.hash] = commit

        self._commits = {key: value for key, value in reversed(self._commits.items())}

    def groups(self):
        group_dict = defaultdict(list)
        for commit in self:
            upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
            if upstream_re:
                commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'

            match = self.MESSAGE_RE.fullmatch(commit.short)
            if not match:
                logger.error(f'Error parsing short commit message: {commit.short!r}')
                continue

            prefix, sub_details_alt, message, issues = match.groups()
            issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []

            if prefix:
                groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
                group = next(iter(filter(None, groups)), None)
                details = ', '.join(unique(details))
                sub_details = list(itertools.chain.from_iterable(sub_details))
            else:
                group = CommitGroup.CORE
                details = None
                sub_details = []

            if sub_details_alt:
                sub_details.append(sub_details_alt)
            sub_details = tuple(unique(sub_details))

            if not group:
                if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
                    group = CommitGroup.EXTRACTOR
                    logger.error(f'Assuming [ie] group for {commit.short!r}')
                else:
                    group = CommitGroup.CORE

            commit_info = CommitInfo(
                details, sub_details, message.strip(),
                issues, commit, self._fixes[commit.hash])

            logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
            group_dict[group].append(commit_info)

        return group_dict

    @staticmethod
    def details_from_prefix(prefix):
        if not prefix:
            return CommitGroup.CORE, None, ()

        prefix, *sub_details = prefix.split(':')

        group, details = CommitGroup.get(prefix)
        if group is CommitGroup.PRIORITY and details:
            details = details.partition('/')[2].strip()

        if details and '/' in details:
            logger.error(f'Prefix is overnested, using first part: {prefix}')
            details = details.partition('/')[0].strip()

        if details == 'common':
            details = None
        elif group is CommitGroup.NETWORKING and details == 'rh':
            details = 'Request Handler'

        return group, details, sub_details


def get_new_contributors(contributors_path, commits):
    contributors = set()
    if contributors_path.exists():
        for line in read_file(contributors_path).splitlines():
            author, _, _ = line.strip().partition(' (')
            authors = author.split('/')
            contributors.update(map(str.casefold, authors))

    new_contributors = set()
    for commit in commits:
        for author in commit.authors:
            author_folded = author.casefold()
            if author_folded not in contributors:
                contributors.add(author_folded)
                new_contributors.add(author)

    return sorted(new_contributors, key=str.casefold)


def create_changelog(args):
    logging.basicConfig(
        datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
        level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)

    commits = CommitRange(None, args.commitish, args.default_author)

    if not args.no_override:
        if args.override_path.exists():
            overrides = json.loads(read_file(args.override_path))
            commits.apply_overrides(overrides)
        else:
            logger.warning(f'File {args.override_path.as_posix()} does not exist')

    logger.info(f'Loaded {len(commits)} commits')

    new_contributors = get_new_contributors(args.contributors_path, commits)
    if new_contributors:
        if args.contributors:
            write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
        logger.info(f'New contributors: {", ".join(new_contributors)}')

    return Changelog(commits.groups(), args.repo, args.collapsible)


def create_parser():
    import argparse

    parser = argparse.ArgumentParser(
        description='Create a changelog markdown from a git commit range')
    parser.add_argument(
        'commitish', default='HEAD', nargs='?',
        help='The commitish to create the range from (default: %(default)s)')
    parser.add_argument(
        '-v', '--verbosity', action='count', default=0,
        help='increase verbosity (can be used twice)')
    parser.add_argument(
        '-c', '--contributors', action='store_true',
        help='update CONTRIBUTORS file (default: %(default)s)')
    parser.add_argument(
        '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
        help='path to the CONTRIBUTORS file')
    parser.add_argument(
        '--no-override', action='store_true',
        help='skip override json in commit generation (default: %(default)s)')
    parser.add_argument(
        '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
        help='path to the changelog_override.json file')
    parser.add_argument(
        '--default-author', default='pukkandan',
        help='the author to use without a author indicator (default: %(default)s)')
    parser.add_argument(
        '--repo', default='yt-dlp/yt-dlp',
        help='the github repository to use for the operations (default: %(default)s)')
    parser.add_argument(
        '--collapsible', action='store_true',
        help='make changelog collapsible (default: %(default)s)')

    return parser


if __name__ == '__main__':
    print(create_changelog(create_parser().parse_args()))