#!/usr/bin/env python3
"""CLI script for automated data archiving and cleanup.

Usage:
    # Dry-run (shows what would be deleted without making changes):
    cleanup meetings --older-than-years 5 --dry-run
    cleanup logs --older-than-days 90 --dry-run
    cleanup rating-snapshots --older-than-years 3 --dry-run

    # Actually delete with confirmation:
    cleanup meetings --older-than-years 5 --confirm

    # Archive to CSV before deleting:
    cleanup meetings --older-than-years 5 --archive --confirm
"""

from __future__ import annotations

import csv
from datetime import UTC, datetime, timedelta
from pathlib import Path
from typing import Any

import click
from sqlalchemy import text

from packages.core.common.logging import get_logger
from packages.core.storage.database import dispose_db, get_session

logger = get_logger(__name__)

# Default archive directory relative to project root
DEFAULT_ARCHIVE_DIR = Path(__file__).resolve().parent.parent / "data" / "archive"


def _get_archive_dir() -> Path:
    """Return archive directory, creating it if needed."""
    archive_dir = DEFAULT_ARCHIVE_DIR
    archive_dir.mkdir(parents=True, exist_ok=True)
    return archive_dir


def _export_to_csv(rows: list[dict[str, Any]], table_name: str) -> Path:
    """Export rows to a timestamped CSV file.

    Args:
        rows: List of row dicts to export.
        table_name: Logical name for the exported data.

    Returns:
        Path to the created CSV file.
    """
    if not rows:
        raise ValueError("No rows to export")

    timestamp = datetime.now(tz=UTC).strftime("%Y%m%d_%H%M%S")
    filename = f"{table_name}_archive_{timestamp}.csv"
    filepath = _get_archive_dir() / filename

    fieldnames = list(rows[0].keys())
    with open(filepath, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)

    logger.info("Exported %d rows to %s", len(rows), filepath)
    return filepath


def _confirm_and_delete(
    rows: list[dict[str, Any]],
    table_name: str,
    description: str,
    archive: bool,
    confirm: bool,
) -> int:
    """Confirm deletion and execute.

    Args:
        rows: Rows to delete (must have 'id' key).
        table_name: SQL table name for DELETE.
        description: Human-readable description of what is being deleted.
        archive: Whether to export to CSV first.
        confirm: Whether to skip interactive confirmation.

    Returns:
        Number of rows deleted.
    """
    if not rows:
        click.echo(f"No {description} found to delete.")
        return 0

    click.echo(f"\nFound {len(rows)} {description}:")
    for row in rows[:20]:
        click.echo(f"  - {row}")
    if len(rows) > 20:
        click.echo(f"  ... and {len(rows) - 20} more")

    if archive:
        filepath = _export_to_csv(rows, table_name)
        click.echo(f"Archived to: {filepath}")

    if not confirm:
        click.echo("\nDry-run mode. Use --confirm to delete.")
        return 0

    if not click.confirm(f"\nDelete {len(rows)} {description}?", default=False):
        click.echo("Aborted.")
        return 0

    ids = [row["id"] for row in rows]
    with get_session() as session:
        result = session.execute(
            text(f"DELETE FROM {table_name} WHERE id = ANY(:ids)"),
            {"ids": ids},
        )
        session.flush()
    click.echo(f"Deleted {result.rowcount} {description}.")
    return result.rowcount


@click.group()
def cli():
    """Automated data archiving and cleanup for TipSharks."""


@cli.command()
@click.option(
    "--older-than-years",
    type=int,
    default=5,
    help="Delete meetings older than this many years",
)
@click.option(
    "--dry-run", is_flag=True, default=False, help="Show what would be deleted"
)
@click.option(
    "--archive",
    is_flag=True,
    default=False,
    help="Export deleted data to CSV before removal",
)
@click.option(
    "--confirm",
    is_flag=True,
    default=False,
    help="Actually perform deletion (skips dry-run)",
)
def meetings(older_than_years: int, dry_run: bool, archive: bool, confirm: bool):
    """Delete old meetings and their associated races/starters/snapshots.

    Meetings are identified by meeting_date. Because races, starters, and
    rating snapshots cascade, only the meeting-level DELETE is needed.
    """
    cutoff = datetime.now(tz=UTC).date() - timedelta(days=365 * older_than_years)
    click.echo(
        f"Finding meetings older than {older_than_years} years " f"(before {cutoff})..."
    )

    with get_session() as session:
        rows = (
            session.execute(
                text(
                    "SELECT id, venue, meeting_date, category "
                    "FROM meetings WHERE meeting_date < :cutoff "
                    "ORDER BY meeting_date"
                ),
                {"cutoff": cutoff},
            )
            .mappings()
            .all()
        )

    return _confirm_and_delete(
        rows=[dict(r) for r in rows],
        table_name="meetings",
        description=f"meetings before {cutoff}",
        archive=archive or dry_run,
        confirm=confirm,
    )


@cli.command()
@click.option(
    "--older-than-days",
    type=int,
    default=90,
    help="Delete log entries older than this many days",
)
@click.option(
    "--dry-run", is_flag=True, default=False, help="Show what would be deleted"
)
@click.option(
    "--archive",
    is_flag=True,
    default=False,
    help="Export deleted data to CSV before removal",
)
@click.option(
    "--confirm",
    is_flag=True,
    default=False,
    help="Actually perform deletion (skips dry-run)",
)
def logs(older_than_days: int, dry_run: bool, archive: bool, confirm: bool):
    """Delete old log entries.

    NOTE: Log cleanup assumes a `logs` table exists. If your logging
    is file-based, this command is a no-op placeholder.
    """
    cutoff = datetime.now(tz=UTC) - timedelta(days=older_than_days)
    click.echo(
        f"Finding log entries older than {older_than_days} days "
        f"(before {cutoff})..."
    )

    # Check if the logs table exists
    with get_session() as session:
        result = session.execute(
            text(
                "SELECT EXISTS ("
                "  SELECT FROM information_schema.tables "
                "  WHERE table_name = 'logs'"
                ")"
            )
        ).scalar()

        if not result:
            click.echo(
                "No `logs` table found in database. "
                "This command is a placeholder for future DB-logged "
                "audit/action logs. Skipping.",
                err=True,
            )
            return 0

        rows = (
            session.execute(
                text(
                    "SELECT id, level, message, created_at "
                    "FROM logs WHERE created_at < :cutoff "
                    "ORDER BY created_at"
                ),
                {"cutoff": cutoff},
            )
            .mappings()
            .all()
        )

    return _confirm_and_delete(
        rows=[dict(r) for r in rows],
        table_name="logs",
        description=f"log entries before {cutoff}",
        archive=archive or dry_run,
        confirm=confirm,
    )


@cli.command()
@click.option(
    "--older-than-years",
    type=int,
    default=3,
    help="Delete rating snapshots older than this many years",
)
@click.option(
    "--dry-run", is_flag=True, default=False, help="Show what would be deleted"
)
@click.option(
    "--archive",
    is_flag=True,
    default=False,
    help="Export deleted data to CSV before removal",
)
@click.option(
    "--confirm",
    is_flag=True,
    default=False,
    help="Actually perform deletion (skips dry-run)",
)
def rating_snapshots(
    older_than_years: int, dry_run: bool, archive: bool, confirm: bool
):
    """Delete old rating snapshots.

    Uses as_of_race_id joined to races/meetings to determine age.
    Only snapshots linked to races older than the cutoff are removed.
    """
    cutoff = datetime.now(tz=UTC).date() - timedelta(days=365 * older_than_years)
    click.echo(
        f"Finding rating snapshots older than {older_than_years} years "
        f"(meeting date before {cutoff})..."
    )

    with get_session() as session:
        rows = (
            session.execute(
                text(
                    "SELECT rs.id, rs.entity_type, rs.entity_id, "
                    "       rs.rating, m.meeting_date "
                    "FROM rating_snapshots rs "
                    "JOIN races r ON r.id = rs.as_of_race_id "
                    "JOIN meetings m ON m.id = r.meeting_id "
                    "WHERE m.meeting_date < :cutoff "
                    "ORDER BY m.meeting_date"
                ),
                {"cutoff": cutoff},
            )
            .mappings()
            .all()
        )

    return _confirm_and_delete(
        rows=[dict(r) for r in rows],
        table_name="rating_snapshots",
        description=f"rating snapshots (meetings before {cutoff})",
        archive=archive or dry_run,
        confirm=confirm,
    )


def main():
    """Entry point for the CLI."""
    try:
        cli()
    finally:
        dispose_db()


if __name__ == "__main__":
    main()
