MCP/mcp.sh

#!/bin/bash

# MCP - Master Control Process for Discord bot instances
# Uses INI-style configuration file
#
# Program Flow:
# 1. Check dependencies and validate arguments
# 2. Load and validate configuration from INI file
# 3. Ensure only one MCP instance is running
# 4. Set up signal handlers for graceful shutdown and control
# 5. Clean up any stale PID files from previous runs
# 6. Validate all client configurations
# 7. Enter main control loop:
#    - Check for crashed clients and restart them
#    - Periodically check for git updates (selective updates)
#    - Monitor configuration file changes
#    - Sleep and repeat

set -euo pipefail

#=============================================================================
# DEPENDENCY CHECKING
#=============================================================================

# Check for required tools at startup.
# Exits if any required dependencies are missing.
check_dependencies() {
    local missing_deps=()

    command -v git >/dev/null || missing_deps+=("git")
    command -v awk >/dev/null || missing_deps+=("awk")
    command -v grep >/dev/null || missing_deps+=("grep")
    command -v sed >/dev/null || missing_deps+=("sed")
    command -v stat >/dev/null || missing_deps+=("stat")

    if [[ ${#missing_deps[@]} -gt 0 ]]; then
        echo "Error: Missing required dependencies: ${missing_deps[*]}" >&2
        echo "Please install the missing tools and try again." >&2
        exit 1
    fi
}

# Run dependency check immediately.
check_dependencies

#=============================================================================
# ARGUMENT HANDLING
#=============================================================================

# Handle built-in commands before normal startup.
case "${1:-}" in
    "status")
        # Status command will be handled after configuration loading
        ;;
    "help"|"-h"|"--help")
        echo "MCP - Master Control Process for Discord bot instances"
        echo ""
        echo "Usage: $0 [command]"
        echo ""
        echo "Commands:"
        echo "  status    Show current MCP and client status"
        echo "  help      Show this help message"
        echo ""
        echo "Configuration:"
        echo "  Set MCP_CONFIG_FILE environment variable to use custom config file"
        echo "  Default: ./mcp.conf"
        echo ""
        echo "Signals:"
        echo "  TERM/INT/QUIT  Graceful shutdown"
        echo "  HUP            Reload configuration (restart required)"
        echo "  USR1           Show status in logs"
        echo "  USR2           Toggle git updates on/off"
        exit 0
        ;;
esac

#=============================================================================
# CONFIGURATION LOADING
#=============================================================================

# Determine the absolute path of the directory containing this script.
# This is used to locate the default configuration file.
# Exits on failure to determine directory.
if ! SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"; then
    echo "Error: Cannot determine script directory" >&2
    exit 1
fi
readonly SCRIPT_DIR

# Set the configuration file path, using MCP_CONFIG_FILE environment variable
# if set, otherwise defaults to mcp.conf in the script directory.
# Exits on failure to construct path.
if ! CONFIG_FILE="${MCP_CONFIG_FILE:-${SCRIPT_DIR}/mcp.conf}"; then
    echo "Error: Cannot set configuration file path" >&2
    exit 1
fi
readonly CONFIG_FILE

# Parse INI-style configuration file to extract a value.
# Uses awk to find [section] then locate key=value pair.
# Parameters:
#   $1 - section: INI section name (without brackets)
#   $2 - key: Configuration key name  
#   $3 - default: Default value if key not found
# Returns: Configuration value or default
get_config_value() {
    local section="$1"
    local key="$2"
    local default="${3:-}"

    awk -F= -v section="[$section]" -v key="$key" '
        $0 == section { in_section = 1; next }
        /^\[/ { in_section = 0; next }
        in_section && $1 ~ "^[[:space:]]*" key "[[:space:]]*$" {
            gsub(/^[[:space:]]+|[[:space:]]+$/, "", $2)
            print $2
            exit
        }
    ' "$CONFIG_FILE" || echo "$default"
}

# Extract all [client.*] section names from configuration file.  
# Uses grep and sed to find and extract client names.
# Returns: List of client names (without [client.] prefix)
get_client_sections() {
    if ! grep -o '^\[client\.[^]]*\]' "$CONFIG_FILE" | sed 's/^\[client\.\(.*\)\]$/\1/'; then
        # Return empty if no client sections found (not an error).
        true
    fi
}

# Check if the configuration file exists and is readable.
# Exits if configuration file cannot be found or read.
if [[ ! -f "$CONFIG_FILE" ]]; then
    echo "Error: Configuration file not found: $CONFIG_FILE" >&2
    exit 1
fi

# Load core MCP configuration settings into readonly variables.
# Loads: MCP_PID_FILE, MCP_INTERVAL, MCP_RESTART_DELAY, MCP_GIT_CHECK_INTERVAL,
#        MCP_DEFAULT_GIT_BRANCH, BOT_DEFAULT_SCRIPT_NAME, BOT_DEFAULT_PID_FILENAME,
#        BOT_DEFAULT_START_STAGGER, MCP_LOG_LEVEL
# Exits on failure to read any configuration value.
load_mcp_config() {
    local temp_value

    if ! temp_value=$(get_config_value "mcp" "pid_file" "./mcp.pid"); then
        echo "Error: Failed to read MCP PID file setting" >&2
        exit 1
    fi
    readonly MCP_PID_FILE="$temp_value"

    if ! temp_value=$(get_config_value "mcp" "interval" "30"); then
        echo "Error: Failed to read MCP interval setting" >&2
        exit 1
    fi
    readonly MCP_INTERVAL="$temp_value"

    if ! temp_value=$(get_config_value "mcp" "restart_delay" "5"); then
        echo "Error: Failed to read MCP restart delay setting" >&2
        exit 1
    fi
    readonly MCP_RESTART_DELAY="$temp_value"

    if ! temp_value=$(get_config_value "mcp" "log_level" "INFO"); then
        echo "Error: Failed to read MCP log level setting" >&2
        exit 1
    fi
    readonly MCP_LOG_LEVEL="$temp_value"

    if ! temp_value=$(get_config_value "git.default" "check_interval" "300"); then
        echo "Error: Failed to read git check interval setting" >&2
        exit 1
    fi
    readonly MCP_GIT_CHECK_INTERVAL="$temp_value"

    if ! temp_value=$(get_config_value "git.default" "branch" "main"); then
        echo "Error: Failed to read default git branch setting" >&2
        exit 1
    fi
    readonly MCP_DEFAULT_GIT_BRANCH="$temp_value"

    if ! temp_value=$(get_config_value "git.default" "enabled" "true"); then
        echo "Error: Failed to read git updates enabled setting" >&2
        exit 1
    fi
    readonly MCP_GIT_UPDATES_DEFAULT="$temp_value"

    if ! temp_value=$(get_config_value "bot.default" "script_name" "discord-bot.js"); then
        echo "Error: Failed to read default bot script name setting" >&2
        exit 1
    fi
    readonly BOT_DEFAULT_SCRIPT_NAME="$temp_value"

    if ! temp_value=$(get_config_value "bot.default" "pid_filename" "bot.pid"); then
        echo "Error: Failed to read default bot PID filename setting" >&2
        exit 1
    fi
    readonly BOT_DEFAULT_PID_FILENAME="$temp_value"

    if ! temp_value=$(get_config_value "bot.default" "start_stagger" "2"); then
        echo "Error: Failed to read default bot start stagger setting" >&2
        exit 1
    fi
    readonly BOT_DEFAULT_START_STAGGER="$temp_value"
}

# Associative arrays for client configuration
declare -A CLIENT_DIRECTORY
declare -A CLIENT_SCRIPT_NAME
declare -A CLIENT_PID_FILENAME
declare -A CLIENT_START_STAGGER
declare -A CLIENT_GIT_BRANCH
declare -A CLIENT_ENABLED

# Load configuration for each client into associative arrays.
# Populates: CLIENT_DIRECTORY, CLIENT_SCRIPT_NAME, CLIENT_PID_FILENAME,
#           CLIENT_START_STAGGER, CLIENT_GIT_BRANCH, CLIENT_ENABLED
# Each client inherits from bot.default unless explicitly overridden.
# Exits on failure to read any client configuration.
load_client_configs() {
    local client_names
    local temp_value

    if ! client_names=$(get_client_sections); then
        echo "Error: Failed to get client sections from config" >&2
        exit 1
    fi

    for client_name in $client_names; do
        if ! temp_value=$(get_config_value "client.$client_name" "directory" ""); then
            echo "Error: Failed to read directory for client $client_name" >&2
            exit 1
        fi
        CLIENT_DIRECTORY["$client_name"]="$temp_value"

        if ! temp_value=$(get_config_value "client.$client_name" "script_name" "$BOT_DEFAULT_SCRIPT_NAME"); then
            echo "Error: Failed to read script name for client $client_name" >&2
            exit 1
        fi
        CLIENT_SCRIPT_NAME["$client_name"]="$temp_value"

        if ! temp_value=$(get_config_value "client.$client_name" "pid_filename" "$BOT_DEFAULT_PID_FILENAME"); then
            echo "Error: Failed to read PID filename for client $client_name" >&2
            exit 1
        fi
        CLIENT_PID_FILENAME["$client_name"]="$temp_value"

        if ! temp_value=$(get_config_value "client.$client_name" "start_stagger" "$BOT_DEFAULT_START_STAGGER"); then
            echo "Error: Failed to read start stagger for client $client_name" >&2
            exit 1
        fi
        CLIENT_START_STAGGER["$client_name"]="$temp_value"

        if ! temp_value=$(get_config_value "client.$client_name" "git_branch" "$MCP_DEFAULT_GIT_BRANCH"); then
            echo "Error: Failed to read git branch for client $client_name" >&2
            exit 1
        fi
        CLIENT_GIT_BRANCH["$client_name"]="$temp_value"

        if ! temp_value=$(get_config_value "client.$client_name" "enabled" "true"); then
            echo "Error: Failed to read enabled status for client $client_name" >&2
            exit 1
        fi
        CLIENT_ENABLED["$client_name"]="$temp_value"
    done
}

# Validate configuration values for correctness.
# Checks numeric ranges, path validity, and logical constraints.
# Exits on any validation failure.
validate_config_values() {
    # Validate numeric values
    if [[ ! "$MCP_INTERVAL" =~ ^[0-9]+$ ]] || ((MCP_INTERVAL < 1)); then
        echo "Error: MCP interval must be a positive integer: $MCP_INTERVAL" >&2
        exit 1
    fi

    if [[ ! "$MCP_RESTART_DELAY" =~ ^[0-9]+$ ]] || ((MCP_RESTART_DELAY < 0)); then
        echo "Error: MCP restart delay must be a non-negative integer: $MCP_RESTART_DELAY" >&2
        exit 1
    fi

    if [[ ! "$MCP_GIT_CHECK_INTERVAL" =~ ^[0-9]+$ ]] || ((MCP_GIT_CHECK_INTERVAL < 60)); then
        echo "Error: Git check interval must be at least 60 seconds: $MCP_GIT_CHECK_INTERVAL" >&2
        exit 1
    fi

    if [[ ! "$BOT_DEFAULT_START_STAGGER" =~ ^[0-9]+$ ]] || ((BOT_DEFAULT_START_STAGGER < 0)); then
        echo "Error: Default start stagger must be a non-negative integer: $BOT_DEFAULT_START_STAGGER" >&2
        exit 1
    fi

    # Validate log level
    case "$MCP_LOG_LEVEL" in
        DEBUG|INFO|WARN|ERROR) ;;
        *) 
            echo "Error: Invalid log level '$MCP_LOG_LEVEL'. Must be: DEBUG, INFO, WARN, ERROR" >&2
            exit 1
            ;;
    esac

    # Validate PID file directory is writable
    local pid_dir
    pid_dir=$(dirname "$MCP_PID_FILE")
    if [[ ! -d "$pid_dir" ]]; then
        echo "Error: PID file directory does not exist: $pid_dir" >&2
        exit 1
    fi

    if [[ ! -w "$pid_dir" ]]; then
        echo "Error: PID file directory is not writable: $pid_dir" >&2
        exit 1
    fi
}

# Load all configuration at startup.
load_mcp_config
load_client_configs
validate_config_values

# Global variable for git updates toggle (can be changed by signal).
GIT_UPDATES_ENABLED="$MCP_GIT_UPDATES_DEFAULT"

#=============================================================================
# LOGGING
#=============================================================================

# Output timestamped debug message to stdout if debug logging is enabled.
# Format: [YYYY-MM-DD HH:MM:SS] [DEBUG] message
# Parameters: All parameters are concatenated as the message.
log_debug() {
    [[ "$MCP_LOG_LEVEL" == "DEBUG" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [DEBUG] $*"
}

# Output timestamped informational message to stdout.
# Format: [YYYY-MM-DD HH:MM:SS] [MCP] message
# Parameters: All parameters are concatenated as the message.
log_info() {
    case "$MCP_LOG_LEVEL" in
        DEBUG|INFO) echo "[$(date '+%Y-%m-%d %H:%M:%S')] [MCP] $*" ;;
    esac
}

# Alias for backward compatibility.
log() { log_info "$@"; }

# Output timestamped warning message to stdout.
# Format: [YYYY-MM-DD HH:MM:SS] [WARN] message
# Parameters: All parameters are concatenated as the warning message.
log_warn() {
    case "$MCP_LOG_LEVEL" in
        DEBUG|INFO|WARN) echo "[$(date '+%Y-%m-%d %H:%M:%S')] [WARN] $*" ;;
    esac
}

# Output timestamped error message to stderr.  
# Format: [YYYY-MM-DD HH:MM:SS] [ERROR] message
# Parameters: All parameters are concatenated as the error message.
log_error() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] [ERROR] $*" >&2
}

#=============================================================================
# ATOMIC FILE OPERATIONS
#=============================================================================

# Atomically update a file with new content.
# Uses temporary file and move operation for atomicity.
# Parameters:
#   $1 - target_file: File to update
#   $2 - content: Content to write to file
# Returns: 0 on success, 1 on failure
atomic_file_update() {
    local target_file="$1"
    local content="$2"
    local temp_file="${target_file}.tmp.$$"

    if echo "$content" > "$temp_file" && mv "$temp_file" "$target_file"; then
        return 0
    else
        rm -f "$temp_file" 2>/dev/null
        return 1
    fi
}

# Atomically update timestamp file.
# Parameters:
#   $1 - timestamp_file: File to update with timestamp
#   $2 - current_time: Timestamp value to write
# Returns: 0 on success, 1 on failure
update_timestamp_file() {
    local timestamp_file="$1"
    local current_time="$2"

    if ! atomic_file_update "$timestamp_file" "$current_time"; then
        log_error "Failed to update timestamp file: $timestamp_file"
        return 1
    fi
}

#=============================================================================
# SINGLE INSTANCE ENFORCEMENT
#=============================================================================

# Enforce single MCP instance by checking PID file.
# Process:
# 1. Check if MCP PID file exists
# 2. If it exists, verify the process is still running  
# 3. If running, exit with error (another MCP is active)
# 4. If not running, remove stale PID file
# 5. Write current process PID to PID file
# Exits if another MCP instance is already running.
check_mcp_instance() {
    if [[ -f "$MCP_PID_FILE" ]]; then
        local pid
        if ! pid=$(cat "$MCP_PID_FILE"); then
            log_error "Cannot read MCP PID file: $MCP_PID_FILE"
            exit 1
        fi

        if kill -0 "$pid" 2>/dev/null; then
            log_error "MCP is already running (PID: $pid)"
            exit 1
        else
            log_info "Removing stale MCP PID file"
            rm -f "$MCP_PID_FILE"
        fi
    fi

    if ! atomic_file_update "$MCP_PID_FILE" "$$"; then
        log_error "Failed to create MCP PID file: $MCP_PID_FILE"
        exit 1
    fi

    log_info "MCP started (PID: $$)"
}

# Graceful shutdown handler for MCP process.
# Process:
# 1. Log shutdown message
# 2. Remove MCP PID file  
# 3. Exit cleanly
# Called by signal handlers and normal shutdown.
cleanup_mcp() {
    log_info "MCP shutting down"
    rm -f "$MCP_PID_FILE"
    exit 0
}

#=============================================================================
# CONFIGURATION MONITORING
#=============================================================================

# Monitor configuration file for changes.
# Logs warning if configuration file has been modified since startup.
# Used to notify operators that a restart might be needed.
check_config_changes() {
    local config_mtime_file="${SCRIPT_DIR}/.config_mtime"
    local config_mtime

    if ! config_mtime=$(stat -c %Y "$CONFIG_FILE" 2>/dev/null); then
        log_warn "Cannot stat configuration file: $CONFIG_FILE"
        return 1
    fi

    if [[ -f "$config_mtime_file" ]]; then
        local last_mtime
        if last_mtime=$(cat "$config_mtime_file") && [[ "$config_mtime" != "$last_mtime" ]]; then
            log_warn "Configuration file has been modified - restart recommended"
        fi
    fi

    if ! atomic_file_update "$config_mtime_file" "$config_mtime"; then
        log_debug "Failed to update config mtime file (non-critical)"
    fi
}

#=============================================================================
# STATUS REPORTING
#=============================================================================

# Show current MCP and client status.
# Displays MCP process status and status of all configured clients.
# Used by status command and USR1 signal handler.
show_status() {
    echo "=== MCP Status ==="
    echo "Configuration: $CONFIG_FILE"
    echo "Log level: $MCP_LOG_LEVEL"
    echo "Git updates: $([ "$GIT_UPDATES_ENABLED" = "true" ] && echo "enabled" || echo "disabled")"
    echo ""

    # MCP status
    if [[ -f "$MCP_PID_FILE" ]]; then
        local mcp_pid
        if mcp_pid=$(cat "$MCP_PID_FILE") && kill -0 "$mcp_pid" 2>/dev/null; then
            echo "✓ MCP running (PID: $mcp_pid)"
        else
            echo "✗ MCP PID file exists but process is dead"
        fi
    else
        echo "✗ MCP not running"
    fi

    echo ""
    echo "=== Client Status ==="

    local found_clients=false
    for client_name in "${!CLIENT_ENABLED[@]}"; do
        found_clients=true
        local status_icon="○"
        local status_text="stopped"
        local pid_info=""

        if [[ "${CLIENT_ENABLED[$client_name]}" != "true" ]]; then
            status_icon="⊝"
            status_text="disabled"
        else
            local pid_file
            pid_file=$(get_client_pid_file "$client_name")

            if [[ -f "$pid_file" ]]; then
                local client_pid
                if client_pid=$(cat "$pid_file") && kill -0 "$client_pid" 2>/dev/null; then
                    status_icon="✓"
                    status_text="running"
                    pid_info=" (PID: $client_pid)"
                else
                    status_icon="✗"
                    status_text="crashed"
                    pid_info=" (stale PID: $client_pid)"
                fi
            fi
        fi

        printf "%s %-20s %s%s - %s\n" \
            "$status_icon" \
            "$client_name" \
            "$status_text" \
            "$pid_info" \
            "${CLIENT_DIRECTORY[$client_name]}"
    done

    if [[ "$found_clients" == false ]]; then
        echo "No clients configured"
    fi
}

# Handle status command if requested.
if [[ "${1:-}" == "status" ]]; then
    show_status
    exit 0
fi

#=============================================================================
# SIGNAL HANDLERS
#=============================================================================

# Reload configuration (placeholder - requires restart).
# Logs message indicating restart is required for config changes.
reload_config() {
    log_warn "Configuration reload requested - restart MCP to apply changes"
}

# Show status in logs.
# Outputs status information to log instead of stdout.
log_status() {
    log_info "=== Status Check Requested ==="
    log_info "Git updates: $([ "$GIT_UPDATES_ENABLED" = "true" ] && echo "enabled" || echo "disabled")"

    local enabled_count=0
    local running_count=0

    for client_name in "${!CLIENT_ENABLED[@]}"; do
        if [[ "${CLIENT_ENABLED[$client_name]}" == "true" ]]; then
            ((enabled_count++))
            if is_client_running "$client_name"; then
                ((running_count++))
            fi
        fi
    done

    log_info "Clients: $running_count/$enabled_count running"
}

# Toggle git updates on/off.
# Switches between enabled and disabled states for git update checking.
toggle_git_updates() {
    if [[ "$GIT_UPDATES_ENABLED" == "true" ]]; then
        GIT_UPDATES_ENABLED="false"
        log_info "Git updates disabled by signal"
    else
        GIT_UPDATES_ENABLED="true"
        log_info "Git updates enabled by signal"
    fi
}

# Configure signal handlers for proper cleanup and control.
# Handlers:
#   TERM/INT/QUIT: Call cleanup_mcp() for graceful shutdown
#   HUP: Reload configuration (restart required)
#   USR1: Show status in logs
#   USR2: Toggle git updates on/off
#   ERR: Log error with line number when script fails
setup_signal_handlers() {
    trap cleanup_mcp TERM INT QUIT
    trap reload_config SIGHUP
    trap log_status SIGUSR1
    trap toggle_git_updates SIGUSR2
    trap 'log_error "MCP crashed on line $LINENO"' ERR
}

#=============================================================================
# CLIENT MANAGEMENT FUNCTIONS
#=============================================================================

# Construct full path to a client's PID file.
# Parameters: $1 - client_name: Name of the client
# Returns: Full path to client's PID file (e.g., /path/to/client/bot.pid)
get_client_pid_file() {
    local client_name="$1"
    local client_dir="${CLIENT_DIRECTORY[$client_name]}"
    local pid_filename="${CLIENT_PID_FILENAME[$client_name]}"
    echo "${client_dir}/${pid_filename}"
}

# Construct full path to a client's executable script.
# Parameters: $1 - client_name: Name of the client  
# Returns: Full path to client's executable (e.g., /path/to/client/discord-bot.js)
get_client_script() {
    local client_name="$1"
    local client_dir="${CLIENT_DIRECTORY[$client_name]}"
    local script_name="${CLIENT_SCRIPT_NAME[$client_name]}"
    echo "${client_dir}/${script_name}"
}

# Check if a client process is currently running.
# Process:
# 1. Get client's PID file path
# 2. Check if PID file exists (no file = not supposed to be running)
# 3. Read PID from file
# 4. Check if process with that PID exists
# Parameters: $1 - client_name: Name of the client
# Returns: 0 if running, 1 if not running
# Side effects: Logs errors for unreadable PID files.
is_client_running() {
    local client_name="$1"
    local pid_file

    if ! pid_file=$(get_client_pid_file "$client_name"); then
        log_error "Failed to get PID file path for client $client_name"
        return 1
    fi

    if [[ ! -f "$pid_file" ]]; then
        return 1
    fi

    local pid
    if ! pid=$(cat "$pid_file"); then
        log_error "Cannot read PID file for client $client_name: $pid_file"
        return 1
    fi

    if kill -0 "$pid" 2>/dev/null; then
        return 0
    else
        return 1
    fi
}

# Clean up stale PID files for all clients.
# Removes PID files where the referenced process no longer exists.
# Called during startup to clean up after crashes or improper shutdowns.
cleanup_stale_pid_files() {
    log_debug "Cleaning up stale PID files..."

    for client_name in "${!CLIENT_ENABLED[@]}"; do
        [[ "${CLIENT_ENABLED[$client_name]}" == "true" ]] || continue

        local pid_file
        if ! pid_file=$(get_client_pid_file "$client_name"); then
            continue
        fi

        if [[ -f "$pid_file" ]]; then
            local pid
            if pid=$(cat "$pid_file") && ! kill -0 "$pid" 2>/dev/null; then
                log_info "Removing stale PID file for client $client_name"
                rm -f "$pid_file"
            fi
        fi
    done
}

# Validate that a client's configuration is correct and files exist.
# Checks:
# 1. Directory is specified and exists
# 2. Script file exists in directory
# 3. Script file is executable
# Parameters: $1 - client_name: Name of the client
# Returns: 0 if valid, 1 if invalid
# Side effects: Logs specific validation errors.
validate_client_config() {
    local client_name="$1"
    local client_dir="${CLIENT_DIRECTORY[$client_name]}"

    if [[ -z "$client_dir" ]]; then
        log_error "Client $client_name: No directory specified"
        return 1
    fi

    if [[ ! -d "$client_dir" ]]; then
        log_error "Client $client_name: Directory does not exist: $client_dir"
        return 1
    fi

    local client_script
    if ! client_script=$(get_client_script "$client_name"); then
        log_error "Client $client_name: Failed to get script path"
        return 1
    fi

    if [[ ! -f "$client_script" ]]; then
        log_error "Client $client_name: Script not found: $client_script"
        return 1
    fi

    if [[ ! -x "$client_script" ]]; then
        log_error "Client $client_name: Script not executable: $client_script"
        return 1
    fi

    return 0
}

# Start a client process in its configured directory.
# Process:
# 1. Log startup message
# 2. Validate client configuration  
# 3. Change to client directory
# 4. Execute client script in background
# 5. Wait for stagger delay
# 6. Log completion
# Parameters: $1 - client_name: Name of the client
# Returns: 0 on success, 1 on failure
# Note: Client is responsible for writing its own PID file.
start_client_instance() {
    local client_name="$1"
    local client_dir="${CLIENT_DIRECTORY[$client_name]}"
    local script_name="${CLIENT_SCRIPT_NAME[$client_name]}"
    local start_stagger="${CLIENT_START_STAGGER[$client_name]}"

    log_info "Starting client: $client_name in $client_dir"

    if ! validate_client_config "$client_name"; then
        return 1
    fi

    (
        cd "$client_dir" || exit 1
        "./$script_name" &
    )

    sleep "$start_stagger"
    log_info "Client $client_name startup initiated"
}

# Gracefully stop a running client process.
# Process:
# 1. Get client's PID file
# 2. Read PID from file
# 3. Send TERM signal for graceful shutdown
# 4. Wait up to 10 seconds for process to exit
# 5. If still running, send KILL signal
# 6. Log all actions
# Parameters: $1 - client_name: Name of the client
# Side effects: Removes process but leaves PID file (client should clean up).
stop_client_instance() {
    local client_name="$1"
    local pid_file

    if ! pid_file=$(get_client_pid_file "$client_name"); then
        log_error "Failed to get PID file path for client $client_name"
        return 1
    fi

    if [[ -f "$pid_file" ]]; then
        local pid
        if ! pid=$(cat "$pid_file"); then
            log_error "Cannot read PID file for client $client_name: $pid_file"
            return 1
        fi

        log_info "Stopping client: $client_name (PID: $pid)"

        if kill -0 "$pid" 2>/dev/null; then
            kill -TERM "$pid"

            local timeout=10
            while kill -0 "$pid" 2>/dev/null && ((timeout > 0)); do
                sleep 1
                ((timeout--))
            done

            if kill -0 "$pid" 2>/dev/null; then
                log_warn "Force killing client: $client_name"
                kill -KILL "$pid" 2>/dev/null || true
            fi
        fi
    fi
}

# Return list of client names that have enabled=true in configuration.
# Used to filter which clients should be managed by MCP.
# Returns: Space-separated list of enabled client names
get_enabled_clients() {
    for client_name in "${!CLIENT_ENABLED[@]}"; do
        if [[ "${CLIENT_ENABLED[$client_name]}" == "true" ]]; then
            echo "$client_name"
        fi
    done
}

# Main client management function - checks all enabled clients.  
# Process for each enabled client:
# 1. Check if client has a PID file (should be running)
# 2. If PID file exists, verify process is actually running
# 3. If process crashed (PID file exists but no process), restart it
# 4. If no PID file exists, client is intentionally stopped - ignore
# Called every MCP_INTERVAL seconds from main loop.
manage_client_instances() {
    local enabled_clients
    if ! enabled_clients=$(get_enabled_clients); then
        log_error "Failed to get enabled clients list"
        return 1
    fi

    while IFS= read -r client_name; do
        [[ -z "$client_name" ]] && continue

        local pid_file
        if ! pid_file=$(get_client_pid_file "$client_name"); then
            log_error "Failed to get PID file for client $client_name"
            continue
        fi

        if [[ -f "$pid_file" ]]; then
            if ! is_client_running "$client_name"; then
                log_info "Client $client_name crashed, restarting..."
                sleep "$MCP_RESTART_DELAY"
                start_client_instance "$client_name"
            fi
        fi
    done <<< "$enabled_clients"
}

#=============================================================================
# GIT UPDATE FUNCTIONS
#=============================================================================

# Periodically check all client repositories for git updates.
# Process:
# 1. Check if git updates are enabled
# 2. Check if enough time has passed since last git check
# 3. If not time yet, return early
# 4. Record current timestamp for next check
# 5. Check each enabled client for updates
# 6. If any updates found, trigger perform_selective_update()
# Uses .last_git_check file to track timing.
# Called every MCP_INTERVAL seconds from main loop.
check_for_updates() {
    # Skip if git updates are disabled
    if [[ "$GIT_UPDATES_ENABLED" != "true" ]]; then
        log_debug "Git updates disabled, skipping check"
        return 0
    fi

    local current_time
    if ! current_time=$(date +%s); then
        log_error "Failed to get current timestamp"
        return 1
    fi

    local last_check_file="${SCRIPT_DIR}/.last_git_check"

    if [[ -f "$last_check_file" ]]; then
        local last_check
        if ! last_check=$(cat "$last_check_file"); then
            log_error "Cannot read last git check file: $last_check_file"
            return 1
        fi

        local time_diff=$((current_time - last_check))

        if ((time_diff < MCP_GIT_CHECK_INTERVAL)); then
            return 0
        fi
    fi

    if ! update_timestamp_file "$last_check_file" "$current_time"; then
        return 1
    fi

    log_info "Checking for git updates..."

    local clients_needing_updates=()
    local enabled_clients

    if ! enabled_clients=$(get_enabled_clients); then
        log_error "Failed to get enabled clients for git update check"
        return 1
    fi

    while IFS= read -r client_name; do
        [[ -z "$client_name" ]] && continue

        if check_client_for_updates "$client_name"; then
            clients_needing_updates+=("$client_name")
        fi
    done <<< "$enabled_clients"

    if [[ ${#clients_needing_updates[@]} -gt 0 ]]; then
        log_info "Updates detected for clients: ${clients_needing_updates[*]}"
        perform_selective_update "${clients_needing_updates[@]}"
    else
        log_debug "No git updates needed"
    fi
}

# Check a single client repository for available updates.
# Process:
# 1. Verify client directory contains .git repository
# 2. Change to client directory
# 3. Fetch latest changes from origin
# 4. Compare local HEAD with remote branch HEAD
# 5. Return success if updates are available
# Parameters: $1 - client_name: Name of the client
# Returns: 0 if updates available, 1 if no updates or error
# Side effects: Logs update availability and errors.
check_client_for_updates() {
    local client_name="$1"
    local client_dir="${CLIENT_DIRECTORY[$client_name]}"
    local git_branch="${CLIENT_GIT_BRANCH[$client_name]}"

    if [[ ! -d "$client_dir/.git" ]]; then
        log_debug "Client $client_name: Not a git repository"
        return 1
    fi

    if ! cd "$client_dir"; then
        log_error "Cannot access client directory for git check: $client_dir"
        return 1
    fi

    if ! git fetch origin 2>/dev/null; then
        log_error "Git fetch failed for client $client_name"
        return 1
    fi

    local local_commit remote_commit

    if ! local_commit=$(git rev-parse HEAD 2>/dev/null); then
        log_error "Failed to get local commit for client $client_name"
        return 1
    fi

    if ! remote_commit=$(git rev-parse "origin/$git_branch" 2>/dev/null); then
        log_error "Failed to get remote commit for client $client_name (branch: $git_branch)"
        return 1
    fi

    if [[ -n "$local_commit" && -n "$remote_commit" && "$local_commit" != "$remote_commit" ]]; then
        log_info "Updates available for client $client_name (branch: $git_branch)"
        return 0
    fi

    return 1
}

# Perform selective updates on only the clients that need them.
# This is more efficient than the old approach of stopping all clients.
# Process:
# 1. For each client needing updates:
#    a. Stop the client if it's running
#    b. Update the client's repository
#    c. Restart the client if it was running before
# 2. Track success/failure for each client
# 3. Log overall results
# Parameters: List of client names that need updates
perform_selective_update() {
    local clients_to_update=("$@")
    local update_success=true

    log_info "Performing selective update on ${#clients_to_update[@]} client(s)"

    for client_name in "${clients_to_update[@]}"; do
        local client_dir="${CLIENT_DIRECTORY[$client_name]}"
        local git_branch="${CLIENT_GIT_BRANCH[$client_name]}"
        local was_running=false

        # Check if client was running before update
        local pid_file
        if pid_file=$(get_client_pid_file "$client_name") && [[ -f "$pid_file" ]]; then
            was_running=true
            log_info "Stopping client $client_name for update"
            stop_client_instance "$client_name"
            sleep 2  # Brief pause for clean shutdown
        fi

        # Perform git update
        log_info "Updating client $client_name (branch: $git_branch)"

        if ! cd "$client_dir"; then
            log_error "Cannot access client directory for update: $client_dir"
            update_success=false
            continue
        fi

        if ! git pull origin "$git_branch"; then
            log_error "Git pull failed for client $client_name"
            update_success=false
            # Still try to restart if it was running
        fi

        # Restart client if it was running before
        if [[ "$was_running" == true ]]; then
            log_info "Restarting client $client_name after update"
            start_client_instance "$client_name"
        fi
    done

    if [[ "$update_success" == true ]]; then
        log_info "All selective updates completed successfully"
    else
        log_error "Some selective updates failed - check logs for details"
        return 1
    fi
}

#=============================================================================
# MAIN CONTROL LOOP
#=============================================================================

# Primary entry point and control loop for MCP.
# Initialization:
# 1. Log startup banner with configuration summary
# 2. Enforce single instance (check_mcp_instance)
# 3. Set up signal handlers
# 4. Clean up any stale PID files
# 5. Validate all enabled client configurations
# 6. Exit if any validation failures
#
# Main Loop (runs indefinitely):
# 1. Check and restart any crashed clients (manage_client_instances)
# 2. Check for and apply git updates (check_for_updates)  
# 3. Monitor configuration file changes (check_config_changes)
# 4. Every 20 loops, log "all systems operational" heartbeat
# 5. Sleep for MCP_INTERVAL seconds
# 6. Repeat
#
# The loop continues until:
# - Signal received (TERM/INT/QUIT) triggers cleanup_mcp()
# - Fatal error occurs (script exits due to set -e)
# - Manual termination
main() {
    log_info "=== MCP (Master Control Process) Starting ==="
    log_info "Configuration file: $CONFIG_FILE"
    log_info "MCP interval: ${MCP_INTERVAL}s"
    log_info "Git check interval: ${MCP_GIT_CHECK_INTERVAL}s"
    log_info "Default script: $BOT_DEFAULT_SCRIPT_NAME"
    log_info "Default PID file: $BOT_DEFAULT_PID_FILENAME"
    log_info "Log level: $MCP_LOG_LEVEL"
    log_info "Git updates: $([ "$GIT_UPDATES_ENABLED" = "true" ] && echo "enabled" || echo "disabled")"

    check_mcp_instance
    setup_signal_handlers
    cleanup_stale_pid_files

    log_info "Validating client configurations..."
    local validation_failed=false
    local enabled_clients

    if ! enabled_clients=$(get_enabled_clients); then
        log_error "Failed to get enabled clients for validation"
        exit 1
    fi

    if [[ -z "$enabled_clients" ]]; then
        log_warn "No enabled clients found in configuration"
    fi

    while IFS= read -r client_name; do
        [[ -z "$client_name" ]] && continue

        if validate_client_config "$client_name"; then
            log_info "Client $client_name validated: ${CLIENT_DIRECTORY[$client_name]}"
        else
            validation_failed=true
        fi
    done <<< "$enabled_clients"

    if [[ "$validation_failed" == true ]]; then
        log_error "Some client configurations failed validation"
        exit 1
    fi

    # Initialize config monitoring
    check_config_changes

    log_info "MCP initialization complete, entering main loop"

    local loop_count=0
    while true; do
        ((loop_count++))

        manage_client_instances
        check_for_updates
        check_config_changes

        if ((loop_count % 20 == 0)); then
            log_info "MCP loop $loop_count - all systems operational"
        fi

        sleep "$MCP_INTERVAL"
    done
}

# Script Entry Point
# Ensures main() only runs when script is executed directly
# (not when sourced by another script).
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    main "$@"
fi