# app/services/github/push_sync_pipeline.py
import asyncio
import os
import io
import httpx
import re
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy.ext.asyncio import AsyncSession

from app.core.logger import logger
from app.services.postgress_db_service import pg_db_service as database_service
from app.services.github.github_client import GitHubClient
from app.services.storage_service import storage_service

from app.schemas.github_schemas import (

    # Enums
    SyncJobStatus
    )




class PushSyncPipeline:
    """Pipeline for push events that only passes IDs between steps."""
    
    async def sync_push(
        self,
        webhook_id: int,
        db: AsyncSession
    ) -> None:
        """Main pipeline - only receives user_id and webhook_id."""
        try:
            logger.info(f"Starting push sync for user webhook {webhook_id}")
            from app.services.webhook.webhook_manager import WebhookManager
            webhook_manger=WebhookManager()
            # Step 1: Get webhook data
            webhook_data=await webhook_manger._get_webhook_data(
                webhook_id=webhook_id,
                db=db
            )
            if not webhook_data:
                return
            
            # Step 2: Extract push event data
            push_data = await self._extract_push_data(webhook_data, db)
            if not push_data:
                return
            
            # Step 3: Sync repository (returns repo_id)
            repo_id = await self._sync_repository(push_data, db)
            if not repo_id:
                return
            
            # Step 4: Sync branch (returns branch_id)
            branch_id = await self._sync_branch(repo_id, push_data, db)
            if not branch_id:
                return
            
            # Step 5: Update webhook with branch_id
            await self._update_webhook_branch(webhook_id, branch_id, db)
            
            # Step 6: Download full branch archive
            await self._download_branch_archive(repo_id, branch_id, push_data, db)
            
            # Step 7: Process changed files and create structured diff
            await self._process_changed_files(repo_id, branch_id, push_data, db)
            
            # Step 8: Sync individual files (returns file_ids)
            file_ids = await self._sync_branch_files(branch_id,webhook_data["user_id"], push_data, db)
            # # Step 9: Process files through AST
            ast_file_ids = await self._process_all_files_ast(file_ids=file_ids,webhook_id=webhook_id, db=db)
            
            logger.info(f"Push sync completed: repo {repo_id}, branch {branch_id}, {len(file_ids)} files")
            
        except Exception as e:
            logger.error(f"Push sync failed for webhook_id {webhook_id}: {str(e)}")
            raise
    
    async def _process_all_files_ast(
        self,
        file_ids: List[int],
        webhook_id:int,
        db: AsyncSession
    ) -> List[int]:
        """Process all files through AST - returns list of processed file_ids."""
        from app.services.ast.ast_file_sync import ASTFileSync
        file_ats_sync_service=ASTFileSync()
        return await file_ats_sync_service.process_files(file_ids, webhook_id, db)
    
    
    async def _extract_push_data(
        self,
        webhook_data: dict,
        db: AsyncSession
    ) -> dict:
        """Extract push event data from webhook payload."""
        try:
            payload = webhook_data["payload"]
            repository = payload.get("repository", {})
            ref = payload.get("ref", "")
            
            # Extract branch name from ref
            branch_name = ref.replace("refs/heads/", "") if ref.startswith("refs/heads/") else ref
            
            return {
                "installation_id": webhook_data["installation_id"],
                "user_id": webhook_data["user_id"],
                "repository_data": repository,
                "branch_name": branch_name,
                "commits": payload.get("commits", []),
                "before_sha": payload.get("before"),
                "after_sha": payload.get("after"),
                "payload": payload
            }
            
        except Exception as e:
            logger.error(f"Failed to extract push data: {str(e)}")
            return None
    
    async def _sync_repository(
        self,
        push_data: dict,
        db: AsyncSession
    ) -> int:
        """Sync repository - returns repo_id."""
        try:
            repository_data = push_data["repository_data"]
            installation_id = push_data["installation_id"]
            user_id = push_data["user_id"]
            
            # Get GitHub account
            github_account = await database_service.get_github_account_by_installation_id(db, installation_id)
            if not github_account:
                logger.error(f"No GitHub account for installation {installation_id}")
                return None
            
            # Get or create repository
            existing_repo = await database_service.get_repository_by_id_and_user_id(
                db, repository_data["id"], user_id, installation_id
            )
            
            if existing_repo:
                return existing_repo.id
            
            # Create new repository
            repo_payload = {
                "github_account_id": github_account.id,
                "repo_id": repository_data["id"],
                "name": repository_data["name"],
                "full_name": repository_data["full_name"],
                "private": repository_data.get("private", False),
                "html_url": repository_data["html_url"],
                "description": repository_data.get("description"),
                "language": repository_data.get("language"),
                "default_branch": repository_data.get("default_branch", "main"),
            }
            
            new_repo = await database_service.create_repository(db, repo_payload)
            return new_repo.id
            
        except Exception as e:
            logger.error(f"Failed to sync repository {repository_data.get('name')}: {str(e)}")
            return None
    
    async def _sync_branch(
        self,
        repo_id: int,
        push_data: dict,
        db: AsyncSession
    ) -> int:
        """Sync branch - returns branch_id."""
        try:
            branch_name = push_data["branch_name"]
            after_sha = push_data["after_sha"]
            
            # Get latest commit info
            commit_info = await self._get_latest_commit_info(push_data)
            
            # Get or create branch
            existing_branch = await database_service.get_branch_by_repo_id_and_name(db, repo_id, branch_name)
            
            if existing_branch:
                # Update existing branch
                await database_service.update_branch(
                    db, existing_branch.id, {
                        "commit_sha": after_sha,
                        "commit_message": commit_info.get("message", ""),
                        "commit_author": commit_info.get("author", ""),
                        "last_synced_at": datetime.utcnow()
                    }
                )
                return existing_branch.id
            
            # Create new branch
            branch_payload = {
                "repository_id": repo_id,
                "name": branch_name,
                "commit_sha": after_sha,
                "commit_message": commit_info.get("message", ""),
                "commit_author": commit_info.get("author", ""),
                "is_protected": False,
                "last_synced_at": datetime.utcnow()
            }
            
            new_branch = await database_service.create_branch(db, branch_payload)
            return new_branch.id
            
        except Exception as e:
            logger.error(f"Failed to sync branch {branch_name}: {str(e)}")
            return None
    
    async def _get_latest_commit_info(
        self,
        push_data: dict
    ) -> dict:
        """Get latest commit information from push data."""
        commits = push_data.get("commits", [])
        if commits:
            latest_commit = commits[-1]
            return {
                "message": latest_commit.get("message", ""),
                "author": latest_commit.get("author", {}).get("name", ""),
                "timestamp": latest_commit.get("timestamp", "")
            }
        return {"message": "", "author": "", "timestamp": ""}
    
    async def _update_webhook_branch(
        self,
        webhook_id: int,
        branch_id: int,
        db: AsyncSession
    ) -> None:
        """Update webhook with branch_id."""
        await database_service.update_webhook_branch(db, webhook_id, branch_id)
    
    async def _download_branch_archive(
        self,
        repo_id: int,
        branch_id: int,
        push_data: dict,
        db: AsyncSession
    ) -> None:
        """Download and store branch archive."""
        try:
            repo = await database_service.get_repository_by_id(db, repo_id)
            branch = await database_service.get_branch_by_id(db, branch_id)
            
            github_client = GitHubClient(push_data["installation_id"])
            token = await github_client._get_installation_token()
            
            # Download branch archive
            archive_url = f"https://api.github.com/repos/{repo.full_name}/zipball/{branch.name}"
            
            async with httpx.AsyncClient() as client:
                headers = {
                    "Authorization": f"token {token}",
                    "Accept": "application/vnd.github.v3+json",
                    "User-Agent": github_client.app_name
                }
                
                response = await client.get(archive_url, headers=headers, follow_redirects=True)
                response.raise_for_status()
                
                # Create archive data
                archive_data = io.BytesIO(response.content)
                filename = f"archaea_user_id-{push_data['user_id']}_repo_id-{repo.id}_branch_id-{branch_id}_branch_name-{branch.name}_code.zip"
                
                # Upload to storage
                storage_result = await storage_service.upload_repo_archive(
                    repo_name=repo.name,
                    archive_data=archive_data,
                    content_type="application/zip",
                    metadata={
                        "repo_id": str(repo.id),
                        "repo_full_name": repo.full_name,
                        "user_id": push_data["user_id"],
                        "branch_id": branch_id,
                        "branch_name": branch.name,
                        "downloaded_at": datetime.now().isoformat(),
                        "sync_type": "push_event"
                    },
                    custom_filename=filename
                )
                
                logger.info(f"Downloaded and stored branch archive: {storage_result}")
                
        except Exception as e:
            logger.error(f"Failed to download branch archive for repo {repo_id}, branch {branch_id}: {str(e)}")
    
    async def _process_changed_files(
        self,
        repo_id: int,
        branch_id: int,
        push_data: dict,
        db: AsyncSession
    ) -> None:
        """Process changed files and create structured diff."""
        try:
            repo = await database_service.get_repository_by_id(db, repo_id)
            branch = await database_service.get_branch_by_id(db, branch_id)
            
            commits = push_data.get("commits", [])
            if not commits:
                logger.info(f"No commits found in push event for {repo.full_name}")
                return

            before_sha = push_data["before_sha"]
            after_sha = push_data["after_sha"]
            
            if not before_sha or before_sha == "0000000000000000000000000000000000000000":
                logger.info(f"Initial commit or no previous SHA for {repo.full_name}, skipping diff")
                return

            logger.info(f"Getting diff between {before_sha} and {after_sha} for {repo.full_name}")

            # Get commit details and structured diff
            github_client = GitHubClient(push_data["installation_id"])
            
            commit_data = await self._get_commit_details(repo.full_name, after_sha, github_client)
            structured_diff = await self._get_structured_diff(repo.full_name, before_sha, after_sha, github_client)
            
            # Create the new format response
            changes_data = self._create_new_format_response(
                repo_full_name=repo.full_name,
                branch_name=branch.name,
                commit_data=commit_data,
                structured_diff=structured_diff
            )
            
            # Store the changes data in PostgreSQL
            await self._store_changes_data_postgres(
                push_data["user_id"], 
                repo.id, 
                repo.name, 
                branch.name, 
                after_sha, 
                changes_data, 
                db, 
                commit_data,
                repo.github_account_id
            )
            
        except Exception as e:
            logger.error(f"Error processing changed files for repo {repo_id}: {str(e)}")
    
    async def _get_commit_details(self, repo_full_name: str, commit_sha: str, github_client: GitHubClient) -> Dict[str, Any]:
        """Get detailed commit information."""
        try:
            commit_data = await github_client._make_request("GET", f"/repos/{repo_full_name}/commits/{commit_sha}")
            return {
                "hash": commit_data["sha"],
                "author": commit_data["commit"]["author"]["name"],
                "timestamp": commit_data["commit"]["author"]["date"],
                "message": commit_data["commit"]["message"]
            }
        except Exception as e:
            logger.error(f"Error getting commit details for {commit_sha}: {e}")
            raise
    
    async def _get_structured_diff(self, repo_full_name: str, before_sha: str, after_sha: str, github_client: GitHubClient) -> List[Dict[str, Any]]:
        """Get structured diff between two commits."""
        try:
            diff_content = await github_client._make_request_get_text(
                "GET", 
                f"/repos/{repo_full_name}/compare/{before_sha}...{after_sha}",
                headers={"Accept": "application/vnd.github.v3.diff"}
            )
            return await self._parse_diff_to_new_format(diff_content)
        except Exception as e:
            logger.error(f"Error getting structured diff for {repo_full_name}: {e}")
            raise
    
    async def _parse_diff_to_new_format(self, diff_content: str) -> List[Dict[str, Any]]:
        """Parse unified diff format into the new structured format."""
        changes = []
        lines = diff_content.split('\n')
        
        current_file = None
        current_change = None
        
        i = 0
        while i < len(lines):
            line = lines[i]
            
            if line.startswith('diff --git'):
                # Save previous file change if exists
                if current_change:
                    changes.append(current_change)
                
                # Extract file paths
                parts = line.split()
                if len(parts) >= 3:
                    old_file = parts[2].replace('a/', '')
                    new_file = parts[3].replace('b/', '')
                    current_file = new_file if new_file != '/dev/null' else old_file
                
                # Determine change type
                change_type = "modified"
                i += 1
                
                # Check for file mode changes or new/deleted files
                while i < len(lines) and not lines[i].startswith('@@'):
                    if lines[i].startswith('new file'):
                        change_type = "added"
                    elif lines[i].startswith('deleted file'):
                        change_type = "deleted"
                    i += 1
                
                # Create new change object
                current_change = {
                    "file_name": current_file.split('/')[-1] if current_file else "unknown",
                    "file_path": current_file,
                    "change_type": change_type,
                    "diffs": []
                }
                
            elif line.startswith('@@') and current_change:
                hunk_header = line[2:].strip()
                hunk_info = self._parse_hunk_header(hunk_header)
                
                current_hunk = {
                    "change_from_line_start": hunk_info['old_start'],
                    "change_to_line_end": hunk_info['old_end'],
                    "change_new_line_start": hunk_info['new_start'],
                    "change_new_line_end": hunk_info['new_end'],
                    "old_lines": [],
                    "new_lines": []
                }
                
                i += 1
                
                # Process hunk content
                while i < len(lines) and not lines[i].startswith('@@') and not lines[i].startswith('diff --git'):
                    content_line = lines[i]
                    
                    if content_line.startswith(' '):
                        current_hunk["old_lines"].append(content_line[1:])
                        current_hunk["new_lines"].append(content_line[1:])
                    elif content_line.startswith('-'):
                        current_hunk["old_lines"].append(content_line[1:])
                    elif content_line.startswith('+'):
                        current_hunk["new_lines"].append(content_line[1:])
                    elif content_line.startswith('\\'):
                        pass
                    else:
                        break
                    i += 1
                
                # Clean up empty lines and validate
                if current_hunk["old_lines"] or current_hunk["new_lines"]:
                    current_hunk["old_lines"] = [line for line in current_hunk["old_lines"] if line.strip()]
                    current_hunk["new_lines"] = [line for line in current_hunk["new_lines"] if line.strip()]
                    
                    if current_change["change_type"] == "added":
                        current_hunk["change_from_line_start"] = 0
                        current_hunk["change_to_line_end"] = 0
                    elif current_change["change_type"] == "deleted":
                        current_hunk["change_new_line_start"] = 0
                        current_hunk["change_new_line_end"] = 0
                    
                    current_change["diffs"].append(current_hunk)
                
            else:
                i += 1
        
        if current_change:
            changes.append(current_change)
        return changes
    
    def _create_new_format_response(self, repo_full_name: str, branch_name: str, commit_data: Dict[str, Any], structured_diff: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Create the new format response."""
        return {
            "repo": f"github.com/{repo_full_name}",
            "branch": branch_name,
            "commit": commit_data,
            "changes": structured_diff
        }
    
    async def _store_changes_data_postgres(self, user_id: str, repo_id: int, repo_name: str, branch_name: str, commit_sha: str, changes_data: Dict[str, Any], db: AsyncSession, commit_data: Dict[str, Any], github_account_id: int):
        """Store the changes data in PostgreSQL."""
        try:
            user = await database_service.get_user_by_id(db, user_id)
            if not user:
                logger.error(f"User not found for user_id: {user_id}")
                return

            repository = await database_service.get_repository_by_id(db, repo_id)
            if not repository:
                logger.error(f"Repository not found for repo_id: {repo_id}")
                return

            branches = await database_service.get_branches_by_repository(db, repository.id)
            branch = next((b for b in branches if b.name == branch_name), None)
            
            if not branch:
                logger.error(f"Branch not found: {branch_name}")
                return
            
            # Create sync job
            job_data = {
                "user_id": user.id,
                "branch_id": branch.id,
                "job_type": "push_sync",
                "status": SyncJobStatus.COMPLETED,
                "trigger_event": "webhook",
                "changes_summary": {
                    "total_files_changed": len(changes_data["changes"]),
                    "files_added": len([c for c in changes_data["changes"] if c["change_type"] == "added"]),
                    "files_modified": len([c for c in changes_data["changes"] if c["change_type"] == "modified"]),
                    "files_deleted": len([c for c in changes_data["changes"] if c["change_type"] == "deleted"])
                }
            }
            sync_job = await database_service.create_sync_job(db, job_data)

            # Create code change record
            change_data = {
                "sync_job_id": sync_job.id,
                "repo": changes_data["repo"],
                "branch": branch_name,
                "commit_hash": commit_sha,
                "commit_author": commit_data["author"],
                "commit_timestamp": datetime.fromisoformat(commit_data["timestamp"].replace('Z', '+00:00')),
                "commit_message": commit_data["message"],
                "changes_data": changes_data,
                "total_files_changed": len(changes_data["changes"]),
                "files_added": len([c for c in changes_data["changes"] if c["change_type"] == "added"]),
                "files_modified": len([c for c in changes_data["changes"] if c["change_type"] == "modified"]),
                "files_deleted": len([c for c in changes_data["changes"] if c["change_type"] == "deleted"])
            }
            response = await database_service.create_code_change(db, change_data)
            logger.info(f"Successfully stored changes data codechangeId: {response.id} in PostgreSQL for {user_id}/{repo_name}/{branch_name}")
            
        except Exception as e:
            logger.error(f"Error storing changes data in PostgreSQL: {e}")
            raise
    
    def _parse_hunk_header(self, hunk_header: str) -> Dict[str, int]:
        """Parse hunk header like @@ -1,5 +1,6 @@ into correct line numbers."""
        pattern = r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@'
        match = re.match(pattern, hunk_header)
        
        if match:
            old_start = int(match.group(1))
            old_count = int(match.group(2)) if match.group(2) else 1
            new_start = int(match.group(3))
            new_count = int(match.group(4)) if match.group(4) else 1
            
            return {
                'old_start': old_start,
                'old_count': old_count,
                'old_end': old_start + old_count - 1,
                'new_start': new_start,
                'new_count': new_count,
                'new_end': new_start + new_count - 1
            }
        
        return self._parse_hunk_header_manual(hunk_header)
    
    def _parse_hunk_header_manual(self, hunk_header: str) -> Dict[str, int]:
        """Manual parsing for hunk headers as fallback."""
        content = hunk_header.strip('@ ')
        parts = content.split()
        
        if len(parts) < 2:
            return self._get_default_hunk_info()
        
        old_range = parts[0]
        new_range = parts[1]
        
        old_parts = old_range.lstrip('-').split(',')
        old_start = int(old_parts[0])
        old_count = int(old_parts[1]) if len(old_parts) > 1 else 1
        
        new_parts = new_range.lstrip('+').split(',')
        new_start = int(new_parts[0])
        new_count = int(new_parts[1]) if len(new_parts) > 1 else 1
        
        return {
            'old_start': old_start,
            'old_count': old_count,
            'old_end': old_start + old_count - 1,
            'new_start': new_start,
            'new_count': new_count,
            'new_end': new_start + new_count - 1
        }
    
    def _get_default_hunk_info(self) -> Dict[str, int]:
        """Return default hunk info when parsing fails."""
        return {
            'old_start': 1,
            'old_count': 0,
            'old_end': 0,
            'new_start': 1,
            'new_count': 0,
            'new_end': 0
        }
    
    async def _sync_branch_files(
        self,
        branch_id: int,
        user_id: int,
        push_data: dict,
        db: AsyncSession
    ) -> List[int]:
        """Sync only changed files from push event - returns list of file_ids."""
        try:
            branch = await database_service.get_branch_by_id(db, branch_id)
            if not branch:
                return []
            
            repo = await database_service.get_repository_by_id(db, branch.repository_id)
            if not repo:
                return []
            # Get changed files from push commits
            changed_files = await self._get_changed_files_from_push(push_data)
            if not changed_files:
                logger.info(f"No changed files found in push event for branch {branch_id}")
                return []
            
            github_client = GitHubClient(push_data["installation_id"])
            file_ids = []
            
            for file_path, change_type in changed_files.items():
                try:
                    if change_type == "removed":
                        # Mark file as deleted in database
                        file_id = await self._mark_file_as_deleted(branch_id, file_path, db)
                        if file_id:
                            logger.debug(f"Marked file as deleted: {file_path}")
                    else:
                        # Get file content from GitHub for added/modified files
                        file_content = await github_client.get_file_content(repo.full_name, file_path, branch.name)
                        if file_content:
                            file_data = {
                                "path": file_path,
                                "name": os.path.basename(file_path),
                                "size": len(file_content),
                                "sha": self._generate_sha(file_content),
                                "content": file_content,
                                "type": "file",
                                "change_type": change_type
                            }
                            
                            file_id = await self._sync_single_file(branch_id, file_data, user_id, db)
                            if file_id:
                                file_ids.append(file_id)
                                logger.debug(f"Synced changed file: {file_path} ({change_type})")
                    
                except Exception as e:
                    logger.warning(f"Failed to sync changed file {file_path}: {str(e)}")
                    continue
            
            logger.info(f"Processed {len(changed_files)} changed files for branch {branch_id}")
            return file_ids
            
        except Exception as e:
            logger.error(f"File sync failed for branch {branch_id}: {str(e)}")
            return []
    def _generate_sha(self, content: str) -> str:
        """Generate SHA hash for file content."""
        import hashlib
        return hashlib.sha1(content.encode()).hexdigest()
    
    async def _mark_file_as_deleted(self, branch_id: int, file_path: str, db: AsyncSession) -> Optional[int]:
        """Mark file as deleted in database."""
        try:
            file_id = self._generate_file_id(branch_id, file_path)
            existing_file = await database_service.get_file_by_file_id(db, file_id)
            
            if existing_file:
                # Update file to mark as deleted
                await database_service.update_file(db, existing_file.id, {
                    "is_deleted": True,
                    "deleted_at": datetime.utcnow()
                })
                return existing_file.id
            return None
            
        except Exception as e:
            logger.error(f"Failed to mark file as deleted {file_path}: {str(e)}")
            return None
    async def _get_changed_files_from_push(self, push_data: dict) -> Dict[str, str]:
        """Extract changed files from push event commits."""
        changed_files = {}
        
        try:
            commits = push_data.get("commits", [])
            for commit in commits:
                # Added files
                for added_file in commit.get("added", []):
                    changed_files[added_file] = "added"
                
                # Modified files
                for modified_file in commit.get("modified", []):
                    changed_files[modified_file] = "modified"
                
                # Removed files
                for removed_file in commit.get("removed", []):
                    changed_files[removed_file] = "removed"
            
            return changed_files
            
        except Exception as e:
            logger.error(f"Failed to extract changed files from push: {str(e)}")
            return {}
    async def _sync_single_file(
        self,
        branch_id: int,
        file_data: dict,
        user_id:int,
        db: AsyncSession
    ) -> int:
        """Sync single file - returns file_id. Uses upsert logic."""
        try:
            branch = await database_service.get_branch_by_id(db, branch_id)
            
            # Extract file_name from file_path
            file_path = file_data["path"]
            file_name = os.path.basename(file_path)
            
            # Generate file_id based on repository, branch, and file path
            file_id = self._generate_file_id(branch.repository_id, branch_id, file_path)
            
            # Check if file already exists
            existing_file = await database_service.get_file_by_file_id(db, file_id)
            
            file_payload = {
                "file_id": file_id,
                "repository_id": branch.repository_id,
                "user_id":user_id,
                "branch_id": branch_id,
                "file_path": file_path,
                "file_name": file_name,
                "file_extension": self._get_file_extension(file_name),
                "file_size": file_data.get("size"),
                "sha": file_data.get("sha"),
                "last_modified": datetime.utcnow()  # Always update timestamp
            }
            
            if existing_file:
                # Update existing file
                updated_file = await database_service.update_file(db, existing_file.id, file_payload)
                logger.debug(f"📝 Updated file: {file_path} (ID: {updated_file.id})")
                return updated_file.id
            else:
                # Create new file
                new_file = await database_service.create_file(db, file_payload)
                logger.debug(f"📄 Created file: {file_path} (ID: {new_file.id})")
                return new_file.id
                
        except Exception as e:
            logger.error(f"Failed to sync file {file_data.get('path')}: {str(e)}")
            return None
    
    def _generate_file_id(self, repository_id: int, branch_id: int, file_path: str) -> str:
        """Generate unique file_id."""
        import hashlib
        unique_string = f"{repository_id}:{branch_id}:{file_path}"
        return hashlib.sha256(unique_string.encode()).hexdigest()[:32]
    
    def _get_file_extension(self, filename: str) -> str:
        """Extract file extension from filename."""
        if "." in filename:
            return filename.split(".")[-1].lower()
        return ""