# app/services/github/installation_sync_pipeline.py
import asyncio
from typing import List
from sqlalchemy.ext.asyncio import AsyncSession
from datetime import datetime
from app.core.logger import logger
from app.services.postgress_db_service import pg_db_service as database_service
from app.services.github.github_client import GitHubClient
import os
from app.models.github_models import UserRepository
import httpx

class InstallationSyncPipeline:
    """Pipeline that only passes IDs between steps."""
    
    async def sync_installation(
        self,
        user_id: str,
        webhook_id: int,
        db: AsyncSession
    ) -> None:
        """Main pipeline - only receives user_id and webhook_id."""
        try:
            logger.info(f"Starting installation sync for user {user_id}, webhook {webhook_id}")
            
            # Step 1: Get installation_id from webhook
            installation_id = await self._get_installation_id(webhook_id, db)
            if not installation_id:
                return
            
            # Step 2: Sync repositories (returns list of repo_ids)
            repo_ids = await self._sync_repositories(user_id, installation_id, db) #On db
            if not repo_ids:
                return
            # Step 3: Sync branches for each repository
            branch_ids = await self._sync_all_branches(repo_ids, installation_id, db)
            
            # Step 4: Sync files for each branch
            file_ids = await self._sync_all_files(branch_ids, installation_id,user_id, db)
            
            # # Step 5: Process files through AST
            ast_file_ids = await self._process_all_files_ast(file_ids,webhook_id, db)
            
            # # Step 6: Chunk processed files
            # await self._chunk_all_files(ast_file_ids, db)
            
            logger.info(f"Installation sync completed: {len(repo_ids)} repos, {len(branch_ids)} branches, {len(file_ids)} files")
            
        except Exception as e:
            logger.error(f"Installation sync failed for user {user_id}: {str(e)}")
            raise
    
    async def _get_installation_id(
        self,
        webhook_id: int,
        db: AsyncSession
    ) -> int:
        """Get installation_id from webhook - only uses webhook_id."""
        webhook = await database_service.get_webhook_by_id(db, webhook_id)
        return webhook.installation_id if webhook else None
    
    async def _sync_repositories(
        self,
        user_id: str,
        installation_id: int,
        db: AsyncSession
    ) -> List[int]:
        """Sync repositories - returns list of repo_ids."""
        try:
            github_client = self._create_github_client(installation_id)
            repos_data = await github_client.get_installation_repositories()
            
            repo_ids = []
            for repo_data in repos_data.get("repositories"):
                repo_id = await self._sync_single_repository(repo_data, installation_id, user_id, db)
                if repo_id:
                    repo_ids.append(repo_id)
            
            logger.info(f"Synced {len(repo_ids)} repositories")
            return repo_ids
            
        except Exception as e:
            logger.error(f"Repository sync failed: {str(e)}")
            return []
    
    async def _sync_single_repository(
        self,
        repo_data: dict,
        installation_id: int,
        user_id: int,
        db: AsyncSession
    ) -> int:
        """Sync single repository - returns repo_id."""
        try:
            github_account = await database_service.get_github_account_by_installation_id(db, installation_id)
            if not github_account:
                return None
            
            
            # Get or create repository
            existing_repo = await database_service.get_repository_by_id_and_user_id(
                db, repo_data["id"],user_id, installation_id
            )

            
            if existing_repo:
                return existing_repo.id
            
            # Create new repository
            repo_payload = {
                "github_account_id": github_account.id,
                "repo_id": repo_data["id"],
                "name": repo_data["name"],
                "full_name": repo_data["full_name"],
                "private": repo_data.get("private", False),
                "html_url": repo_data["html_url"],
                "description": repo_data.get("description"),
                "language": repo_data.get("language"),
                "default_branch": repo_data.get("default_branch", "main"),
            }
            
            new_repo = await database_service.create_repository(db, repo_payload)
            return new_repo.id
            
        except Exception as e:
            logger.error(f"Failed to sync repository {repo_data.get('name')}: {str(e)}")
            return None
    
    async def _sync_all_branches(
        self,
        repo_ids: List[int],
        installation_id: int,
        db: AsyncSession
    ) -> List[int]:
        """Sync all branches for repositories - returns list of branch_ids."""
        branch_ids = []
        
        for repo_id in repo_ids:
            repo_branch_ids = await self._sync_repository_branches(repo_id, installation_id, db)
            branch_ids.extend(repo_branch_ids)
        
        logger.info(f"Synced {len(branch_ids)} branches across {len(repo_ids)} repositories")
        return branch_ids
    
    async def _sync_repository_branches(
        self,
        repo_id: int,
        installation_id: int,
        db: AsyncSession
    ) -> List[int]:
        """Sync branches for a repository - returns list of branch_ids."""
        try:
            repo = await database_service.get_repository_by_id(db, repo_id)
            if not repo:
                return []
            
            github_client = self._create_github_client(installation_id)
            branches_data = await github_client.get_repository_branches(repo.full_name)
            branch_ids = []
            for branch_data in branches_data:
                branch_id = await self._sync_single_branch(repo_id, branch_data, db)
                if branch_id:
                    branch_ids.append(branch_id)
                # Download and store branch archive
                    await self._download_branch_archive(
                        repo=repo,
                        branch_name=branch_data["name"],
                        branch_id=branch_id,
                        user_id=repo.github_account.user_id,
                        github_client=github_client
                    )
            
            
            return branch_ids
            
        except Exception as e:
            logger.error(f"Branch sync failed for repository {repo_id}: {str(e)}")
            return []
    async def _download_branch_archive(
        self,
        repo: UserRepository,
        branch_name: str,
        branch_id: int,
        user_id: int,
        github_client: GitHubClient
    ) -> None:
        """Download and store branch archive."""
        try:
            import io
            import zipfile
            from datetime import datetime
            
            # Download branch archive
            archive_url = f"https://api.github.com/repos/{repo.full_name}/zipball/{branch_name}"
            
            async with httpx.AsyncClient() as client:
                headers = {
                    "Authorization": f"token {await github_client._get_installation_token()}",
                    "Accept": "application/vnd.github.v3+json",
                    "User-Agent": github_client.app_name
                }
                
                response = await client.get(archive_url, headers=headers, follow_redirects=True)
                response.raise_for_status()
                
                # Create archive data
                archive_data = io.BytesIO(response.content)
                filename = f"archaea_user_id-{user_id}_repo_id-{repo.id}_branch_id-{branch_id}_branch_name-{branch_name}_code.zip"
                
                # Upload to storage
                from app.services.storage_service import storage_service
                
                storage_result = await storage_service.upload_repo_archive(
                    repo_name=repo.name,
                    archive_data=archive_data,
                    content_type="application/zip",
                    metadata={
                        "repo_id": str(repo.id),
                        "repo_full_name": repo.full_name,
                        "user_id": user_id,
                        "branch_id": branch_id,
                        "branch_name": branch_name,
                        "downloaded_at": datetime.now().isoformat(),
                        "sync_type": "installation_sync"
                    },
                    custom_filename=filename
                )
                
                logger.info(f"Downloaded and stored branch archive:{storage_result}")
                
        except Exception as e:
            logger.error(f"Failed to download branch archive for {repo.full_name}/{branch_name}: {str(e)}")
    async def _sync_single_branch(
        self,
        repo_id: int,
        branch_data: dict,
        db: AsyncSession
    ) -> int:
        """Sync single branch - returns branch_id."""
        try:
            existing_branch = await database_service.get_branch_by_repo_id_and_name(
                db, repo_id, branch_data["name"]
            )
            
            if existing_branch:
                return existing_branch.id
            
            # Create new branch
            branch_payload = {
                "repository_id": repo_id,
                "name": branch_data["name"],
                "commit_sha": branch_data.get("commit", {}).get("sha"),
                "is_protected": branch_data.get("protected", False),
            }
            
            new_branch = await database_service.create_branch(db, branch_payload)
            return new_branch.id
            
        except Exception as e:
            logger.error(f"Failed to sync branch {branch_data.get('name')}: {str(e)}")
            return None
    
    async def _sync_all_files(
        self,
        branch_ids: List[int],
        installation_id: int,
        user_id:int,
        db: AsyncSession
    ) -> List[int]:
        """Sync all files for branches - returns list of file_ids."""
        file_ids = []
        
        for branch_id in branch_ids:
            branch_file_ids = await self._sync_branch_files(branch_id, installation_id,user_id, db)
            file_ids.extend(branch_file_ids)
        
        logger.info(f"Synced {len(file_ids)} files across {len(branch_ids)} branches")
        return file_ids
    
    async def _sync_branch_files(
        self,
        branch_id: int,
        installation_id: int,
        user_id:int,
        db: AsyncSession
    ) -> List[int]:
        """Sync files for a branch - returns list of file_ids."""
        try:
            branch = await database_service.get_branch_by_id(db, branch_id)
            if not branch:
                return []
            
            repo = await database_service.get_repository_by_id(db, branch.repository_id)
            if not repo:
                return []
            
            github_client = self._create_github_client(installation_id)
            files_data = await github_client.get_branch_files(repo.full_name, branch.name)
            
            # print(files_data)
            
            file_ids = []
            for file_data in files_data:
                if file_data["type"]=="tree":
                    continue
                file_id = await self._sync_single_file(branch_id, file_data,user_id, db)
                if file_id:
                    file_ids.append(file_id)
            
            return file_ids
            
        except Exception as e:
            logger.error(f"File sync failed for branch {branch_id}: {str(e)}")
            return []
    
    async def _sync_single_file(
        self,
        branch_id: int,
        file_data: dict,
        user_id:int,
        db: AsyncSession
    ) -> int:
        """Sync single file - returns file_id. Uses upsert logic."""
        try:
            branch = await database_service.get_branch_by_id(db, branch_id)
            
            # Extract file_name from file_path
            file_path = file_data["path"]
            file_name = os.path.basename(file_path)
            
            # Generate file_id based on repository, branch, and file path
            file_id = self._generate_file_id(branch.repository_id, branch_id, file_path)
            
            # Check if file already exists
            existing_file = await database_service.get_file_by_file_id(db, file_id)
            
            file_payload = {
                "file_id": file_id,
                "repository_id": branch.repository_id,
                "user_id":user_id,
                "branch_id": branch_id,
                "file_path": file_path,
                "file_name": file_name,
                "file_extension": self._get_file_extension(file_name),
                "file_size": file_data.get("size"),
                "sha": file_data.get("sha"),
                "updated_at": datetime.utcnow() , # Always update timestamp
                "last_modified": datetime.utcnow()  # Always update timestamp

            }
            
            if existing_file:
                # Update existing file
                updated_file = await database_service.update_file(db, existing_file.id, file_payload)
                logger.debug(f"📝 Updated file: {file_path} (ID: {updated_file.id})")
                return updated_file.id
            else:
                # Create new file
                new_file = await database_service.create_file(db, file_payload)
                logger.debug(f"📄 Created file: {file_path} (ID: {new_file.id})")
                return new_file.id
                
        except Exception as e:
            logger.error(f"Failed to sync file {file_data.get('path')}: {str(e)}")
            return None
    
    async def _process_all_files_ast(
        self,
        file_ids: List[int],
        webhook_id:int,
        db: AsyncSession
    ) -> List[int]:
        """Process all files through AST - returns list of processed file_ids."""

        from app.services.ast.ast_file_sync import ASTFileSync
        file_ats_sync_service=ASTFileSync()
        return await file_ats_sync_service.process_files(file_ids, webhook_id, db)

    
    # async def _chunk_all_files(
    #     self,
    #     file_ids: List[int],
    #     db: AsyncSession
    # ) -> None:
    #     """Chunk all processed files."""
    #     from app.services.chunking.chunk_processor import ChunkProcessor
        
    #     chunk_processor = ChunkProcessor()
        
    #     for file_id in file_ids:
    #         await chunk_processor.chunk_file(file_id, db)
        
        # logger.info(f"Chunked {len(file_ids)} files")
    
    def _create_github_client(self, installation_id: int):
        """Create GitHub client - makes its own token."""
        return GitHubClient(installation_id)
    
    def _generate_file_id(self, repository_id: int, branch_id: int, file_path: str) -> str:
        """Generate unique file_id."""
        import hashlib
        unique_string = f"{repository_id}:{branch_id}:{file_path}"
        return hashlib.sha256(unique_string.encode()).hexdigest()[:32]
    
    def _get_file_extension(self, filename: str) -> str:
        """Extract file extension."""
        if "." in filename:
            return filename.split(".")[-1].lower()
        return ""