#!/usr/bin/env python3
"""测试GPU可用性和配置"""

import torch
import sys

def test_gpu():
    print("=== GPU测试 ===")
    print(f"PyTorch版本: {torch.__version__}")
    print(f"CUDA可用: {torch.cuda.is_available()}")

    if torch.cuda.is_available():
        print(f"CUDA版本: {torch.version.cuda}")
        print(f"GPU数量: {torch.cuda.device_count()}")

        for i in range(torch.cuda.device_count()):
            print(f"\nGPU {i}:")
            print(f"  名称: {torch.cuda.get_device_name(i)}")
            print(f"  内存总量: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")

            # 测试GPU张量
            try:
                x = torch.randn(3, 3).cuda(i)
                y = torch.randn(3, 3).cuda(i)
                z = torch.matmul(x, y)
                print(f"  GPU计算测试: 成功")
                print(f"  当前内存使用: {torch.cuda.memory_allocated(i) / 1024**2:.2f} MB")
                print(f"  最大内存使用: {torch.cuda.max_memory_allocated(i) / 1024**2:.2f} MB")
            except Exception as e:
                print(f"  GPU计算测试: 失败 - {e}")
    else:
        print("警告: CUDA不可用，将使用CPU运行")

    return torch.cuda.is_available()

def test_device_resolution():
    print("\n=== 设备解析测试 ===")

    def resolve_device(mode: str) -> str:
        mode = mode.lower()
        if mode == "cpu":
            return "cpu"
        if mode == "cuda":
            if not torch.cuda.is_available():
                raise SystemExit("device set to cuda but CUDA is not available")
            return "cuda"
        if torch.cuda.is_available():
            return "cuda"
        return "cpu"

    test_cases = [
        ("auto", "应该自动选择GPU（如果可用）"),
        ("cpu", "应该强制使用CPU"),
        ("cuda", "应该使用GPU（如果可用）"),
    ]

    for mode, description in test_cases:
        try:
            device = resolve_device(mode)
            print(f"模式 '{mode}': {device} - {description}")
        except SystemExit as e:
            print(f"模式 '{mode}': 错误 - {e}")
        except Exception as e:
            print(f"模式 '{mode}': 异常 - {e}")

def test_training_components():
    print("\n=== 训练组件测试 ===")

    # 测试是否能导入关键模块
    try:
        from hybrid_diffusion import HybridDiffusionModel, cosine_beta_schedule
        print("✓ hybrid_diffusion 模块导入成功")

        # 创建一个简单的模型
        model = HybridDiffusionModel(cont_dim=10, disc_vocab_sizes=[5, 3, 2])

        if torch.cuda.is_available():
            model = model.cuda()
            print("✓ 模型可以移动到GPU")
        else:
            print("✓ 模型在CPU上")

        # 测试前向传播
        batch_size = 2
        seq_len = 16
        x_cont = torch.randn(batch_size, seq_len, 10)
        x_disc = torch.randint(0, 5, (batch_size, seq_len, 3))
        t = torch.randint(0, 100, (batch_size,))

        if torch.cuda.is_available():
            x_cont = x_cont.cuda()
            x_disc = x_disc.cuda()
            t = t.cuda()
            model = model.cuda()

        eps_pred, logits = model(x_cont, x_disc, t)
        print(f"✓ 前向传播成功")
        print(f"  连续输出形状: {eps_pred.shape}")
        print(f"  离散输出数量: {len(logits)}")

    except ImportError as e:
        print(f"✗ 模块导入失败: {e}")
    except Exception as e:
        print(f"✗ 测试失败: {e}")

def main():
    print("开始GPU和训练配置测试...")

    gpu_available = test_gpu()
    test_device_resolution()
    test_training_components()

    print("\n=== 使用建议 ===")
    if gpu_available:
        print("1. 使用GPU运行: python run_pipeline.py --device cuda")
        print("2. 或自动选择: python run_pipeline.py --device auto")
        print("3. 单独训练: python train.py --device cuda")
        print("4. 单独采样: python export_samples.py --device cuda")
    else:
        print("1. 只能使用CPU运行: python run_pipeline.py --device cpu")
        print("2. 检查CUDA和PyTorch安装")
        print("3. 确保有NVIDIA GPU和正确的驱动程序")

    return gpu_available

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)