import subprocess
import argparse
import sys
import ast
import os
import re

# Function to run 'git diff' command and capture its output
# Update the git command to be more specific if needed.
# e.g update args to look like the below line to only include java files
#   ['git', 'diff', commit1, commit2, '--name-status', '--', '*.java']
def run_git_diff(commit1, commit2):
    print(f'Executing git diff for \'{commit1}\'...\'{commit2}\'')
    try:
        args = ['git', 'diff', commit1, commit2, '--name-status']
        result = subprocess.run(args,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                text=True)
        
        # Check if there is any stderr
        if result.stderr:
            print("Error:", result.stderr, file=sys.stderr)
            return 1
        
        return result.stdout.strip()
        
    except Exception as e:
        print("An error occurred:", e, file=sys.stderr)
        return 1

# Function to format and escape file paths
# This is used only if there are spaces or quotes in a file path
def format_file_name(cwd, file):
    file = os.path.join(cwd, file)

    if ' ' in file or '"' in file:
        file = '"' + file.replace('"', '\\"') + '"'

    return file

# Function to parse git diff output
# Consumes each relevant line from Git and produces the appropriate override line
# Note that all paths are transformed from a relative to an absolute path
def parse_git_diff_output(diff_output):
    print(f'Parsing git output')
    cwd = os.getcwd()
    added_files = []
    deleted_files = []
    renamed_files = []    
    
    for line in re.split('\r?\n', diff_output):
        # Updating files marked as added
        if line.startswith('A\t'):
            _, path = line.split('\t', 1)
            path = path if not path.startswith('"') else ast.literal_eval(path)

            added_files.append(format_file_name(cwd, path))

        # Updating files marked as deleted
        elif line.startswith('D\t'):
            _, path = line.split('\t', 1)
            path = path if not path.startswith('"') else ast.literal_eval(path)

            deleted_files.append(format_file_name(cwd, path))

        # Updating files flagged as a move
        # Git has a confidence level for renames that could be used
        # e.g R074 file1.java file2.java where it is a 74% match.
        elif line.startswith('R'):
            _, paths_info = line.split('\t', 1)
            old_path, new_path = paths_info.split('\t')
            old_path = old_path if not old_path.startswith('"') else ast.literal_eval(old_path)
            new_path = new_path if not new_path.startswith('"') else ast.literal_eval(new_path)

            renamed_files.append((format_file_name(cwd, old_path), format_file_name(cwd, new_path)))
    
    return added_files, deleted_files, renamed_files

# This must be run from inside the Git repository
# To run this multiple time for different repositories the override files should be concatenated 
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Using Git generate an Validate override file")
    parser.add_argument("commit1", help="A tag, branch or commit to compare from")
    parser.add_argument("commit2", help="A tag, branch or commit to compare to")
    parser.add_argument("output", help="The output override file")
    args = parser.parse_args()

    commit1 = args.commit1
    commit2 = args.commit2

    git_diff_output = run_git_diff(commit1, commit2)
    
    if git_diff_output == 1:
        sys.exit(1)
    
    added_files, deleted_files, renamed_files = parse_git_diff_output(git_diff_output)
    
    print(f'Writing output file')
    print(f'  added:   {len(added_files)}')
    print(f'  deleted: {len(deleted_files)}')
    print(f'  renamed: {len(renamed_files)}')

    with open(args.output, "w") as f:
        for path in added_files:
            f.write(f'A {path}\n')
        
        for path in deleted_files:
            f.write(f'D {path}\n')
        
        for old_path, new_path in renamed_files:
            f.write(f"R {old_path} {new_path}\n")
    
    print(f'Finished')
