# Standard library imports
import sys
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Add parent directory to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

# Scientific computing
import numpy as np
import pandas as pd
from scipy import stats

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-v0_8-darkgrid')

# Custom modules
from regime_detector import MSGARCHDetector
from model_selection import ModelSelector
from visualizations import plot_regime_persistence, plot_model_comparison
from utils import calculate_regime_statistics

print("✅ Libraries imported successfully")
print(f"📁 Project root: {project_root}")

✅ Libraries imported successfully
📁 Project root: /home/jaden/Documents/projects/trade-matrix-mvp/src/trade-matrix/research/ms-garch

# Load weekly Kraken data
data_path = project_root / 'data' / 'historical'

# Bitcoin weekly data (2018-2025)
btc_weekly = pd.read_csv(
    data_path / 'kraken_btcusd_1w_20180104_20251009.csv',
    parse_dates=['timestamp'],
    index_col='timestamp'
)

# Ethereum weekly data (2018-2025)
eth_weekly = pd.read_csv(
    data_path / 'kraken_ethusd_1w_20180104_20251009.csv',
    parse_dates=['timestamp'],
    index_col='timestamp'
)

print(f"📊 BTC Weekly Data: {len(btc_weekly)} weeks ({btc_weekly.index[0]} to {btc_weekly.index[-1]})")
print(f"📊 ETH Weekly Data: {len(eth_weekly)} weeks ({eth_weekly.index[0]} to {eth_weekly.index[-1]})")

# Display basic statistics
print("\n" + "="*50)
print("BTC Weekly Data Summary:")
print("="*50)
print(btc_weekly.describe())

print("\n" + "="*50)
print("ETH Weekly Data Summary:")
print("="*50)
print(eth_weekly.describe())

📊 BTC Weekly Data: 406 weeks (2018-01-04 00:00:00 to 2025-10-09 00:00:00)
📊 ETH Weekly Data: 406 weeks (2018-01-04 00:00:00 to 2025-10-09 00:00:00)

==================================================
BTC Weekly Data Summary:
==================================================
                open           high            low          close  \
count     406.000000     406.000000     406.000000     406.000000   
mean    35659.536946   37765.050739   33414.781281   35894.940887   
std     31173.574159   32484.848168   29664.024492   31379.392530   
min      3367.200000    3494.100000    3120.000000    3367.400000   
25%      9322.725000    9969.000000    8907.525000    9322.425000   
50%     26642.850000   28217.450000   25351.150000   27001.650000   
75%     53431.575000   58173.850000   48984.500000   54628.275000   
max    123354.300000  126198.100000  118344.400000  123354.300000   

              volume      turnover  
count     406.000000  4.060000e+02  
mean    30961.960070  7.819907e+08  
std     21452.753455  6.056266e+08  
min      5418.176330  6.090860e+07  
25%     16988.674085  3.464914e+08  
50%     24967.703906  6.391746e+08  
75%     37189.289324  1.003603e+09  
max    171377.098861  3.832182e+09  

==================================================
ETH Weekly Data Summary:
==================================================
              open         high          low        close        volume  \
count   406.000000   406.000000   406.000000   406.000000  4.060000e+02   
mean   1663.832857  1794.019581  1510.673153  1670.871281  3.675601e+05   
std    1294.231876  1381.353286  1181.204058  1298.104969  3.101622e+05   
min      89.110000   103.900000    80.560000    89.140000  5.282686e+04   
25%     318.612500   364.737500   273.700000   318.665000  1.687158e+05   
50%    1642.500000  1751.600000  1534.435000  1645.545000  2.766826e+05   
75%    2649.592500  2813.650000  2373.895000  2662.160000  4.538328e+05   
max    4750.830000  4956.010000  4341.390000  4750.830000  2.075632e+06   

           turnover  
count  4.060000e+02  
mean   4.448646e+08  
std    5.018038e+08  
min    1.918252e+07  
25%    1.380444e+08  
50%    2.721722e+08  
75%    5.825032e+08  
max    4.215843e+09

# Calculate log returns for MS-GARCH fitting
btc_weekly['log_returns'] = np.log(btc_weekly['close'] / btc_weekly['close'].shift(1))
eth_weekly['log_returns'] = np.log(eth_weekly['close'] / eth_weekly['close'].shift(1))

# Drop NaN (first row)
btc_returns = btc_weekly['log_returns'].dropna()
eth_returns = eth_weekly['log_returns'].dropna()

# Convert to percentage for better numerical stability
btc_returns_pct = btc_returns * 100
eth_returns_pct = eth_returns * 100

print(f"✅ BTC Weekly Returns: {len(btc_returns_pct)} observations")
print(f"   Mean: {btc_returns_pct.mean():.4f}%, Std: {btc_returns_pct.std():.4f}%")
print(f"   Min: {btc_returns_pct.min():.4f}%, Max: {btc_returns_pct.max():.4f}%")

print(f"\n✅ ETH Weekly Returns: {len(eth_returns_pct)} observations")
print(f"   Mean: {eth_returns_pct.mean():.4f}%, Std: {eth_returns_pct.std():.4f}%")
print(f"   Min: {eth_returns_pct.min():.4f}%, Max: {eth_returns_pct.max():.4f}%")

# Plot returns distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].hist(btc_returns_pct, bins=50, alpha=0.7, edgecolor='black')
axes[0].set_title('BTC Weekly Returns Distribution', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Weekly Return (%)')
axes[0].set_ylabel('Frequency')
axes[0].axvline(0, color='red', linestyle='--', linewidth=2, label='Zero')
axes[0].legend()

axes[1].hist(eth_returns_pct, bins=50, alpha=0.7, edgecolor='black', color='orange')
axes[1].set_title('ETH Weekly Returns Distribution', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Weekly Return (%)')
axes[1].set_ylabel('Frequency')
axes[1].axvline(0, color='red', linestyle='--', linewidth=2, label='Zero')
axes[1].legend()

plt.tight_layout()
plt.show()

✅ BTC Weekly Returns: 405 observations
   Mean: 0.4950%, Std: 9.2312%
   Min: -38.3119%, Max: 33.2287%

✅ ETH Weekly Returns: 405 observations
   Mean: 0.2747%, Std: 12.2271%
   Min: -49.6184%, Max: 47.6502%

# Initialize model selector for BTC
# OPTIMIZED: Reduced parameters for faster execution (3x n_starts, 200 max_iter)
print("="*60)
print("Fitting BTC MS-GARCH Models (Weekly Data)")
print("="*60)

import time
btc_start_time = time.time()

# Create ModelSelector with OPTIMIZED fitting parameters
btc_selector = ModelSelector(
    max_iter=200,    # Reduced from 500 (weekly data needs fewer iterations)
    tol=1e-3,
    n_starts=3,      # Reduced from 10 (3 is statistically sufficient)
    verbose=True
)

# Fit 2/3/4-regime models in one call using compare_regime_counts()
print("\nFitting 2, 3, 4-regime MS-GARCH models...")
btc_comparison = btc_selector.compare_regime_counts(
    returns=btc_returns_pct.values,
    n_regimes_list=[2, 3, 4],
    garch_type='gjrGARCH',
    distribution='normal',
    bar_frequency='1W'  # Weekly frequency for duration conversion
)

btc_elapsed = time.time() - btc_start_time
print("\n" + "="*60)
print(f"All BTC models fitted successfully in {btc_elapsed:.1f} seconds")
print("="*60)

============================================================
Fitting BTC MS-GARCH Models (Weekly Data)
============================================================

Fitting 2, 3, 4-regime MS-GARCH models...
======================================================================
MS-GARCH MODEL SELECTION: REGIME COUNT COMPARISON
======================================================================
GARCH Type: gjrGARCH
Distribution: normal
Comparing: [2, 3, 4] regimes
======================================================================

======================================================================
Fitting 2-regime model...
======================================================================
======================================================================
MS-GARCH Model Estimation
======================================================================
Specification: 2-regime gjrGARCH
Distribution: normal
Observations: 405
Random starts: 3
======================================================================

Random start 1/3...

  Converged at iteration 17
  ✓ New best log-likelihood: -1372.06

Random start 2/3...

  Converged at iteration 17

Random start 3/3...

  Converged at iteration 17

======================================================================
ESTIMATION COMPLETE
======================================================================
Final log-likelihood: -1372.06
AIC: 2766.12
BIC: 2810.17
Converged: True
======================================================================

✓ 2-regime model complete:
  Log-Likelihood: -1372.06
  AIC: 2766.12
  BIC: 2810.17
  Converged: True
  Avg Duration: 21.13 days (3.02 bars @ 1W)
  Unique Regimes: 2/2

======================================================================
Fitting 3-regime model...
======================================================================
======================================================================
MS-GARCH Model Estimation
======================================================================
Specification: 3-regime gjrGARCH
Distribution: normal
Observations: 405
Random starts: 3
======================================================================

Random start 1/3...

  Converged at iteration 72
  ✓ New best log-likelihood: -1372.11

Random start 2/3...

  Converged at iteration 72

Random start 3/3...

  Converged at iteration 72

======================================================================
ESTIMATION COMPLETE
======================================================================
Final log-likelihood: -1372.11
AIC: 2784.21
BIC: 2864.29
Converged: True
======================================================================

✓ 3-regime model complete:
  Log-Likelihood: -1372.11
  AIC: 2784.21
  BIC: 2864.29
  Converged: True
  Avg Duration: 16.38 days (2.34 bars @ 1W)
  Unique Regimes: 3/3

======================================================================
Fitting 4-regime model...
======================================================================
======================================================================
MS-GARCH Model Estimation
======================================================================
Specification: 4-regime gjrGARCH
Distribution: normal
Observations: 405
Random starts: 3
======================================================================

Random start 1/3...

  ✓ New best log-likelihood: -1371.86

Random start 2/3...

Random start 3/3...

======================================================================
ESTIMATION COMPLETE
======================================================================
Final log-likelihood: -1371.86
AIC: 2805.72
BIC: 2929.84
Converged: False
======================================================================

✓ 4-regime model complete:
  Log-Likelihood: -1371.86
  AIC: 2805.72
  BIC: 2929.84
  Converged: False
  Avg Duration: 13.87 days (1.98 bars @ 1W)
  Unique Regimes: 4/4

======================================================================
COMPARISON SUMMARY
======================================================================

Information Criteria (lower is better):
 n_regimes         AIC         BIC        HQIC  converged
         2 2766.124572 2810.167330 2783.557528       True
         3 2784.214841 2864.292582 2815.911125       True
         4 2805.722710 2929.843209 2854.851950      False

Regime Persistence:
 n_regimes  avg_duration_days  min_duration_days  max_duration_days
         2          21.129390           9.695428          32.563353
         3          16.379723           7.258271          32.545372
         4          13.869466           7.007005          32.829027

Model Quality:
 n_regimes  unique_regimes  has_degenerate  converged
         2               2           False       True
         3               3           False       True
         4               4           False      False

Best by criterion:
  AIC: 2 regimes (AIC=2766.12)
  BIC: 2 regimes (BIC=2810.17)
  HQIC: 2 regimes (HQIC=2783.56)

============================================================
All BTC models fitted successfully in 165.8 seconds
============================================================

# Initialize model selector for ETH
# OPTIMIZED: Reduced parameters for faster execution (3x n_starts, 200 max_iter)
print("="*60)
print("Fitting ETH MS-GARCH Models (Weekly Data)")
print("="*60)

eth_start_time = time.time()

# Create ModelSelector with OPTIMIZED fitting parameters
eth_selector = ModelSelector(
    max_iter=200,    # Reduced from 500 (weekly data needs fewer iterations)
    tol=1e-3,
    n_starts=3,      # Reduced from 10 (3 is statistically sufficient)
    verbose=True
)

# Fit 2/3/4-regime models in one call using compare_regime_counts()
print("\nFitting 2, 3, 4-regime MS-GARCH models...")
eth_comparison = eth_selector.compare_regime_counts(
    returns=eth_returns_pct.values,
    n_regimes_list=[2, 3, 4],
    garch_type='gjrGARCH',
    distribution='normal',
    bar_frequency='1W'  # Weekly frequency for duration conversion
)

eth_elapsed = time.time() - eth_start_time
print("\n" + "="*60)
print(f"All ETH models fitted successfully in {eth_elapsed:.1f} seconds")
print("="*60)

============================================================
Fitting ETH MS-GARCH Models (Weekly Data)
============================================================

Fitting 2, 3, 4-regime MS-GARCH models...
======================================================================
MS-GARCH MODEL SELECTION: REGIME COUNT COMPARISON
======================================================================
GARCH Type: gjrGARCH
Distribution: normal
Comparing: [2, 3, 4] regimes
======================================================================

======================================================================
Fitting 2-regime model...
======================================================================
======================================================================
MS-GARCH Model Estimation
======================================================================
Specification: 2-regime gjrGARCH
Distribution: normal
Observations: 405
Random starts: 3
======================================================================

Random start 1/3...

  Converged at iteration 42
  ✓ New best log-likelihood: -1498.72

Random start 2/3...

  Converged at iteration 42

Random start 3/3...

  Converged at iteration 42

======================================================================
ESTIMATION COMPLETE
======================================================================
Final log-likelihood: -1498.72
AIC: 3019.44
BIC: 3063.48
Converged: True
======================================================================

✓ 2-regime model complete:
  Log-Likelihood: -1498.72
  AIC: 3019.44
  BIC: 3063.48
  Converged: True
  Avg Duration: 17.00 days (2.43 bars @ 1W)
  Unique Regimes: 2/2

======================================================================
Fitting 3-regime model...
======================================================================
======================================================================
MS-GARCH Model Estimation
======================================================================
Specification: 3-regime gjrGARCH
Distribution: normal
Observations: 405
Random starts: 3
======================================================================

Random start 1/3...

  ✓ New best log-likelihood: -1497.80

Random start 2/3...

Random start 3/3...

======================================================================
ESTIMATION COMPLETE
======================================================================
Final log-likelihood: -1497.80
AIC: 3035.60
BIC: 3115.68
Converged: False
======================================================================

✓ 3-regime model complete:
  Log-Likelihood: -1497.80
  AIC: 3035.60
  BIC: 3115.68
  Converged: False
  Avg Duration: 13.43 days (1.92 bars @ 1W)
  Unique Regimes: 3/3

======================================================================
Fitting 4-regime model...
======================================================================
======================================================================
MS-GARCH Model Estimation
======================================================================
Specification: 4-regime gjrGARCH
Distribution: normal
Observations: 405
Random starts: 3
======================================================================

Random start 1/3...

  ✓ New best log-likelihood: -1497.68

Random start 2/3...

Random start 3/3...

======================================================================
ESTIMATION COMPLETE
======================================================================
Final log-likelihood: -1497.68
AIC: 3057.37
BIC: 3181.49
Converged: False
======================================================================

✓ 4-regime model complete:
  Log-Likelihood: -1497.68
  AIC: 3057.37
  BIC: 3181.49
  Converged: False
  Avg Duration: 11.84 days (1.69 bars @ 1W)
  Unique Regimes: 4/4

======================================================================
COMPARISON SUMMARY
======================================================================

Information Criteria (lower is better):
 n_regimes         AIC         BIC        HQIC  converged
         2 3019.439210 3063.481968 3036.872166       True
         3 3035.602134 3115.679875 3067.298418      False
         4 3057.366926 3181.487425 3106.496167      False

Regime Persistence:
 n_regimes  avg_duration_days  min_duration_days  max_duration_days
         2          16.999872           7.641350          26.358394
         3          13.434662           7.007006          26.168461
         4          11.835371           7.007006          26.259787

Model Quality:
 n_regimes  unique_regimes  has_degenerate  converged
         2               2           False       True
         3               3           False      False
         4               4           False      False

Best by criterion:
  AIC: 2 regimes (AIC=3019.44)
  BIC: 2 regimes (BIC=3063.48)
  HQIC: 2 regimes (HQIC=3036.87)

============================================================
All ETH models fitted successfully in 222.4 seconds
============================================================

# BTC Model Selection
print("="*60)
print("BTC Model Selection Results (Weekly Data)")
print("="*60)

# btc_comparison is already returned from compare_regime_counts() above
print(btc_comparison.to_string())

# Select best model by BIC using select_best() method
btc_best_spec, btc_best_model = btc_selector.select_best(criterion='BIC')
btc_best_k = int(btc_best_spec['n_regimes'])
print(f"\nBest BTC Model: {btc_best_k}-regime (by BIC)")

# Get regime statistics using the utility function
btc_regime_stats = calculate_regime_statistics(
    btc_best_model,
    frequency='1W'
)

print("\n" + "="*60)
print("BTC Regime Statistics (Weekly Data):")
print("="*60)
for k, stats in btc_regime_stats.items():
    print(f"\nRegime {k}:")
    print(f"  Frequency: {stats['frequency']:.2%}")
    print(f"  Average Duration: {stats['avg_duration_days']:.2f} days ({stats['avg_duration_weeks']:.2f} weeks)")
    print(f"  Unconditional Vol: {stats['unconditional_vol']:.4f}%")

============================================================
BTC Model Selection Results (Weekly Data)
============================================================
   n_regimes garch_type distribution  log_likelihood  n_params          AIC          BIC         HQIC  converged  avg_persistence  min_duration_bars  max_duration_bars  avg_duration_bars  min_duration_days  max_duration_days  avg_duration_days  unique_regimes  has_degenerate bar_frequency
0          2   gjrGARCH       normal    -1372.062286        11  2766.124572  2810.167330  2783.557528       True         0.531522           1.385061           4.651908           3.018484           9.695428          32.563353          21.129390               2           False            1W
1          3   gjrGARCH       normal    -1372.107420        20  2784.214841  2864.292582  2815.911125       True         0.356892           1.036896           4.649339           2.339960           7.258271          32.545372          16.379723               3           False            1W
2          4   gjrGARCH       normal    -1371.861355        31  2805.722710  2929.843209  2854.851950      False         0.244526           1.001001           4.689861           1.981352           7.007005          32.829027          13.869466               4           False            1W

======================================================================
BEST MODEL (by BIC)
======================================================================
Regimes: 2
GARCH: gjrGARCH
Distribution: normal
Log-Likelihood: -1372.06
BIC: 2810.17
Avg Duration: 21.13 days
Converged: True
Unique Regimes: 2/2
======================================================================

Best BTC Model: 2-regime (by BIC)

============================================================
BTC Regime Statistics (Weekly Data):
============================================================

Regime 0:
  Frequency: 77.06%
  Average Duration: 32.56 days (4.65 weeks)
  Unconditional Vol: 0.0000%

Regime 1:
  Frequency: 22.94%
  Average Duration: 9.70 days (1.39 weeks)
  Unconditional Vol: 0.0000%

# ETH Model Selection
print("="*60)
print("ETH Model Selection Results (Weekly Data)")
print("="*60)

# eth_comparison is already returned from compare_regime_counts() above
print(eth_comparison.to_string())

# Select best model by BIC using select_best() method
eth_best_spec, eth_best_model = eth_selector.select_best(criterion='BIC')
eth_best_k = int(eth_best_spec['n_regimes'])
print(f"\nBest ETH Model: {eth_best_k}-regime (by BIC)")

# Get regime statistics using the utility function
eth_regime_stats = calculate_regime_statistics(
    eth_best_model,
    frequency='1W'
)

print("\n" + "="*60)
print("ETH Regime Statistics (Weekly Data):")
print("="*60)
for k, stats in eth_regime_stats.items():
    print(f"\nRegime {k}:")
    print(f"  Frequency: {stats['frequency']:.2%}")
    print(f"  Average Duration: {stats['avg_duration_days']:.2f} days ({stats['avg_duration_weeks']:.2f} weeks)")
    print(f"  Unconditional Vol: {stats['unconditional_vol']:.4f}%")

============================================================
ETH Model Selection Results (Weekly Data)
============================================================
   n_regimes garch_type distribution  log_likelihood  n_params          AIC          BIC         HQIC  converged  avg_persistence  min_duration_bars  max_duration_bars  avg_duration_bars  min_duration_days  max_duration_days  avg_duration_days  unique_regimes  has_degenerate bar_frequency
0          2   gjrGARCH       normal    -1498.719605        11  3019.439210  3063.481968  3036.872166       True         0.409181           1.091621           3.765485           2.428553           7.641350          26.358394          16.999872               2           False            1W
1          3   gjrGARCH       normal    -1497.801067        20  3035.602134  3115.679875  3067.298418      False         0.250510           1.001001           3.738352           1.919237           7.007006          26.168461          13.434662               3           False            1W
2          4   gjrGARCH       normal    -1497.683463        31  3057.366926  3181.487425  3106.496167      False         0.186252           1.001001           3.751398           1.690767           7.007006          26.259787          11.835371               4           False            1W

======================================================================
BEST MODEL (by BIC)
======================================================================
Regimes: 2
GARCH: gjrGARCH
Distribution: normal
Log-Likelihood: -1498.72
BIC: 3063.48
Avg Duration: 17.00 days
Converged: True
Unique Regimes: 2/2
======================================================================

Best ETH Model: 2-regime (by BIC)

============================================================
ETH Regime Statistics (Weekly Data):
============================================================

Regime 0:
  Frequency: 77.53%
  Average Duration: 26.36 days (3.77 weeks)
  Unconditional Vol: 0.0000%

Regime 1:
  Frequency: 22.47%
  Average Duration: 7.64 days (1.09 weeks)
  Unconditional Vol: 0.0000%

# Visualize model comparison
fig, axes = plt.subplots(2, 1, figsize=(12, 10))

# BTC comparison
btc_comparison_plot = btc_comparison[['AIC', 'BIC', 'HQIC']]
btc_comparison_plot.plot(kind='bar', ax=axes[0], width=0.8)
axes[0].set_title('BTC MS-GARCH Model Comparison (Weekly Data)', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Number of Regimes')
axes[0].set_ylabel('Information Criterion (lower is better)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# ETH comparison
eth_comparison_plot = eth_comparison[['AIC', 'BIC', 'HQIC']]
eth_comparison_plot.plot(kind='bar', ax=axes[1], width=0.8, color=['orange', 'red', 'purple'])
axes[1].set_title('ETH MS-GARCH Model Comparison (Weekly Data)', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Number of Regimes')
axes[1].set_ylabel('Information Criterion (lower is better)')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(project_root / 'outputs' / 'weekly_model_results' / 'model_comparison_weekly.png', dpi=300, bbox_inches='tight')
plt.show()

# Analyze BTC regime persistence
print("="*60)
print("📈 BTC Regime Persistence Analysis (Weekly Data)")
print("="*60)

# Get transition matrix
btc_transition_matrix = btc_best_model.transition_matrix_
print("\nTransition Matrix (P[i,j] = probability of transitioning from regime i to regime j):")
print(pd.DataFrame(
    btc_transition_matrix,
    index=[f'Regime {i}' for i in range(btc_best_k)],
    columns=[f'Regime {j}' for j in range(btc_best_k)]
).to_string())

# Calculate expected duration for each regime
print("\nExpected Regime Durations:")
for i in range(btc_best_k):
    # Expected duration = 1 / (1 - P[i,i])
    stay_prob = btc_transition_matrix[i, i]
    expected_duration_weeks = 1 / (1 - stay_prob) if stay_prob < 1 else np.inf
    expected_duration_days = expected_duration_weeks * 7  # Convert weeks to days
    
    print(f"  Regime {i}: {expected_duration_days:.2f} days ({expected_duration_weeks:.2f} weeks)")
    print(f"    Stay Probability: {stay_prob:.4f}")
    print(f"    Exit Probability: {1-stay_prob:.4f}")

============================================================
📈 BTC Regime Persistence Analysis (Weekly Data)
============================================================

Transition Matrix (P[i,j] = probability of transitioning from regime i to regime j):
          Regime 0  Regime 1
Regime 0  0.785034  0.214966
Regime 1  0.721990  0.278010

Expected Regime Durations:
  Regime 0: 32.56 days (4.65 weeks)
    Stay Probability: 0.7850
    Exit Probability: 0.2150
  Regime 1: 9.70 days (1.39 weeks)
    Stay Probability: 0.2780
    Exit Probability: 0.7220

# Analyze ETH regime persistence
print("="*60)
print("📈 ETH Regime Persistence Analysis (Weekly Data)")
print("="*60)

# Get transition matrix
eth_transition_matrix = eth_best_model.transition_matrix_
print("\nTransition Matrix (P[i,j] = probability of transitioning from regime i to regime j):")
print(pd.DataFrame(
    eth_transition_matrix,
    index=[f'Regime {i}' for i in range(eth_best_k)],
    columns=[f'Regime {j}' for j in range(eth_best_k)]
).to_string())

# Calculate expected duration for each regime
print("\nExpected Regime Durations:")
for i in range(eth_best_k):
    # Expected duration = 1 / (1 - P[i,i])
    stay_prob = eth_transition_matrix[i, i]
    expected_duration_weeks = 1 / (1 - stay_prob) if stay_prob < 1 else np.inf
    expected_duration_days = expected_duration_weeks * 7  # Convert weeks to days
    
    print(f"  Regime {i}: {expected_duration_days:.2f} days ({expected_duration_weeks:.2f} weeks)")
    print(f"    Stay Probability: {stay_prob:.4f}")
    print(f"    Exit Probability: {1-stay_prob:.4f}")

============================================================
📈 ETH Regime Persistence Analysis (Weekly Data)
============================================================

Transition Matrix (P[i,j] = probability of transitioning from regime i to regime j):
          Regime 0  Regime 1
Regime 0  0.734430  0.265570
Regime 1  0.916069  0.083931

Expected Regime Durations:
  Regime 0: 26.36 days (3.77 weeks)
    Stay Probability: 0.7344
    Exit Probability: 0.2656
  Regime 1: 7.64 days (1.09 weeks)
    Stay Probability: 0.0839
    Exit Probability: 0.9161

# Economic interpretation: Rank regimes by volatility
print("="*60)
print("💡 Economic Interpretation of Regimes")
print("="*60)

print("\nBTC Regimes (Ranked by Volatility):")
btc_vol_ranking = sorted(
    [(k, stats['unconditional_vol']) for k, stats in btc_regime_stats.items()],
    key=lambda x: x[1]
)
for rank, (regime_id, vol) in enumerate(btc_vol_ranking, 1):
    stats = btc_regime_stats[regime_id]
    label = "Low-Vol" if rank <= btc_best_k // 2 else "High-Vol"
    print(f"\n{rank}. Regime {regime_id} ({label}):")
    print(f"   Unconditional Vol: {vol:.4f}%")
    print(f"   Frequency: {stats['frequency']:.2%}")
    print(f"   Duration: {stats['avg_duration_days']:.2f} days")

print("\n" + "="*60)
print("\nETH Regimes (Ranked by Volatility):")
eth_vol_ranking = sorted(
    [(k, stats['unconditional_vol']) for k, stats in eth_regime_stats.items()],
    key=lambda x: x[1]
)
for rank, (regime_id, vol) in enumerate(eth_vol_ranking, 1):
    stats = eth_regime_stats[regime_id]
    label = "Low-Vol" if rank <= eth_best_k // 2 else "High-Vol"
    print(f"\n{rank}. Regime {regime_id} ({label}):")
    print(f"   Unconditional Vol: {vol:.4f}%")
    print(f"   Frequency: {stats['frequency']:.2%}")
    print(f"   Duration: {stats['avg_duration_days']:.2f} days")

============================================================
💡 Economic Interpretation of Regimes
============================================================

BTC Regimes (Ranked by Volatility):

1. Regime 0 (Low-Vol):
   Unconditional Vol: 0.0000%
   Frequency: 77.06%
   Duration: 32.56 days

2. Regime 1 (High-Vol):
   Unconditional Vol: 0.0000%
   Frequency: 22.94%
   Duration: 9.70 days

============================================================

ETH Regimes (Ranked by Volatility):

1. Regime 0 (Low-Vol):
   Unconditional Vol: 0.0000%
   Frequency: 77.53%
   Duration: 26.36 days

2. Regime 1 (High-Vol):
   Unconditional Vol: 0.0000%
   Frequency: 22.47%
   Duration: 7.64 days

# Calculate average regime duration across all regimes (weighted by frequency)
btc_avg_duration_days = sum(
    stats['avg_duration_days'] * stats['frequency']
    for stats in btc_regime_stats.values()
)

eth_avg_duration_days = sum(
    stats['avg_duration_days'] * stats['frequency']
    for stats in eth_regime_stats.values()
)

# Calculate annual transaction frequency
btc_annual_switches = 365 / btc_avg_duration_days
eth_annual_switches = 365 / eth_avg_duration_days

# Comparison table
comparison_data = {
    'Metric': [
        'Data Frequency',
        'Number of Regimes',
        'Average Regime Duration (days)',
        'Annual Transaction Switches',
        'Transaction Cost Drag (@ 0.12% per round-trip)',
        'Improvement vs Daily Baseline'
    ],
    'Daily Baseline (BTC)': [
        '1D',
        '2',
        '3.26',
        '112',
        '13.4%',
        '-'
    ],
    'Weekly Data (BTC)': [
        '1W',
        f'{btc_best_k}',
        f'{btc_avg_duration_days:.2f}',
        f'{btc_annual_switches:.0f}',
        f'{btc_annual_switches * 0.12:.1f}%',
        f'{(btc_avg_duration_days / 3.26 - 1) * 100:.1f}%'
    ],
    'Weekly Data (ETH)': [
        '1W',
        f'{eth_best_k}',
        f'{eth_avg_duration_days:.2f}',
        f'{eth_annual_switches:.0f}',
        f'{eth_annual_switches * 0.12:.1f}%',
        f'{(eth_avg_duration_days / 3.26 - 1) * 100:.1f}%'
    ]
}

comparison_df = pd.DataFrame(comparison_data)

print("="*80)
print("📊 Weekly vs Daily Data Comparison")
print("="*80)
print(comparison_df.to_string(index=False))

# Success criteria evaluation
print("\n" + "="*80)
print("✅ Success Criteria Evaluation")
print("="*80)

criteria = [
    ('Regime Duration ≥ 7 days', btc_avg_duration_days >= 7, f'BTC: {btc_avg_duration_days:.2f} days'),
    ('Annual Switches ≤ 60', btc_annual_switches <= 60, f'BTC: {btc_annual_switches:.0f} switches'),
    ('Cost Drag < 10%', btc_annual_switches * 0.12 < 10, f'BTC: {btc_annual_switches * 0.12:.1f}%'),
    ('Duration Improvement ≥ 2x', btc_avg_duration_days / 3.26 >= 2, f'BTC: {btc_avg_duration_days / 3.26:.2f}x')
]

for criterion, passed, detail in criteria:
    status = '✅ PASS' if passed else '❌ FAIL'
    print(f"{status}: {criterion} - {detail}")

================================================================================
📊 Weekly vs Daily Data Comparison
================================================================================
                                        Metric Daily Baseline (BTC) Weekly Data (BTC) Weekly Data (ETH)
                                Data Frequency                   1D                1W                1W
                             Number of Regimes                    2                 2                 2
                Average Regime Duration (days)                 3.26             27.32             22.15
                   Annual Transaction Switches                  112                13                16
Transaction Cost Drag (@ 0.12% per round-trip)                13.4%              1.6%              2.0%
                 Improvement vs Daily Baseline                    -            737.9%            579.5%

================================================================================
✅ Success Criteria Evaluation
================================================================================
✅ PASS: Regime Duration ≥ 7 days - BTC: 27.32 days
✅ PASS: Annual Switches ≤ 60 - BTC: 13 switches
✅ PASS: Cost Drag < 10% - BTC: 1.6%
✅ PASS: Duration Improvement ≥ 2x - BTC: 8.38x

# Save best models for Week 2 backtesting
import pickle
from pathlib import Path

output_dir = project_root / 'outputs' / 'weekly_model_results'
output_dir.mkdir(parents=True, exist_ok=True)

# Save BTC model
btc_model_path = output_dir / f'best_model_{btc_best_k}regime_weekly_btc.pkl'
with open(btc_model_path, 'wb') as f:
    pickle.dump(btc_best_model, f)
print(f"✅ Saved BTC {btc_best_k}-regime model to: {btc_model_path}")

# Save ETH model
eth_model_path = output_dir / f'best_model_{eth_best_k}regime_weekly_eth.pkl'
with open(eth_model_path, 'wb') as f:
    pickle.dump(eth_best_model, f)
print(f"✅ Saved ETH {eth_best_k}-regime model to: {eth_model_path}")

# Save regime statistics
stats_path = output_dir / 'regime_statistics_weekly.json'
import json
with open(stats_path, 'w') as f:
    json.dump({
        'btc': btc_regime_stats,
        'eth': eth_regime_stats,
        'btc_avg_duration_days': float(btc_avg_duration_days),
        'eth_avg_duration_days': float(eth_avg_duration_days),
        'btc_annual_switches': float(btc_annual_switches),
        'eth_annual_switches': float(eth_annual_switches)
    }, f, indent=2, default=str)
print(f"✅ Saved regime statistics to: {stats_path}")

print("\n" + "="*60)
print("✅ Week 1 Complete: Models saved and ready for Week 2 backtesting")
print("="*60)

✅ Saved BTC 2-regime model to: /home/jaden/Documents/projects/trade-matrix-mvp/src/trade-matrix/research/ms-garch/outputs/weekly_model_results/best_model_2regime_weekly_btc.pkl
✅ Saved ETH 2-regime model to: /home/jaden/Documents/projects/trade-matrix-mvp/src/trade-matrix/research/ms-garch/outputs/weekly_model_results/best_model_2regime_weekly_eth.pkl
✅ Saved regime statistics to: /home/jaden/Documents/projects/trade-matrix-mvp/src/trade-matrix/research/ms-garch/outputs/weekly_model_results/regime_statistics_weekly.json

============================================================
✅ Week 1 Complete: Models saved and ready for Week 2 backtesting
============================================================

#	Notebook	Article	Focus
1	01_data_exploration	Data Exploration	CRISP-DM methodology
2	02_model_development	Model Development	2-regime GJR-GARCH
3	03_backtesting	Backtesting	Walk-forward validation
4	04_weekly_data_research (this notebook)	Weekly Optimization	Frequency analysis

📚 Appendix: Portfolio Article¶

Published Article¶

Main Reference¶

Phase 1: Weekly Data MS-GARCH Research¶

Notebook Structure¶

1. Setup & Data Loading¶

2. Model Fitting: 2/3/4-Regime MS-GARCH¶

3. Model Selection: AIC/BIC/HQIC Comparison¶

4. Regime Analysis: Duration, Persistence, Economic Interpretation¶

5. Comparison: Weekly vs Daily Baseline¶

6. Conclusions & Week 2 Preparation¶

📝 Research Notes¶

Metric	Daily Baseline	Weekly BTC	Weekly ETH
Regime Duration	3.26 days	27.32 days	22.15 days
Annual Switches	112	13	16
Transaction Cost Drag	13.4%	1.6%	2.0%
Improvement	-	+738%	+580%

📚 Appendix: Portfolio Article¶

Published Article¶

Related Research in This Series¶

Main Reference¶

Phase 1: Weekly Data MS-GARCH Research¶

Notebook Structure¶

1. Setup & Data Loading¶

2. Model Fitting: 2/3/4-Regime MS-GARCH¶

3. Model Selection: AIC/BIC/HQIC Comparison¶

4. Regime Analysis: Duration, Persistence, Economic Interpretation¶

5. Comparison: Weekly vs Daily Baseline¶

6. Conclusions & Week 2 Preparation¶

📝 Research Notes¶