#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import json

def analyze_training_data_simple():
    """使用标准库分析训练数据"""
    print("开始分析训练数据（简化版）...")
    
    # 读取类别映射
    with open('../image_data/Butterfly20_dict.json', 'r') as f:
        class_dict = json.load(f)
    
    print(f"总类别数: {len(class_dict)}")
    
    # 统计每个类别的图片数量
    butterfly_dir = 'Butterfly20'
    class_counts = {}
    
    for class_folder in sorted(os.listdir(butterfly_dir)):
        class_path = os.path.join(butterfly_dir, class_folder)
        if os.path.isdir(class_path):
            image_files = [f for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
            class_counts[class_folder] = len(image_files)
    
    print("\n每个类别的图片数量:")
    for class_name, count in sorted(class_counts.items()):
        print(f"{class_name}: {count} 张图片")
    
    total_images = sum(class_counts.values())
    print(f"\n总图片数量: {total_images}")
    
    return class_counts, total_images

def analyze_test_data_simple():
    """分析测试数据"""
    print("\n分析测试数据...")
    
    test_dir = 'data/Butterfly20_test'
    test_images = [f for f in os.listdir(test_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    
    print(f"测试图片数量: {len(test_images)}")
    
    # 检查测试图片的命名顺序
    test_ids = []
    for img_name in test_images:
        if img_name.endswith('.jpg'):
            try:
                img_id = int(img_name.split('.')[0])
                test_ids.append(img_id)
            except:
                pass
    
    if test_ids:
        print(f"测试图片ID范围: {min(test_ids)} - {max(test_ids)}")
        missing_ids = [i for i in range(1, 201) if i not in test_ids]
        if missing_ids:
            print(f"缺失的图片ID: {missing_ids}")
        else:
            print("测试图片ID完整 (1-200)")
    
    return len(test_images)

def check_required_files():
    """检查必要的文件是否存在"""
    print("\n检查必要文件:")
    
    required_files = [
        '../image_data/Butterfly20_dict.json',
        '../image_data/genus.txt', 
        '../image_data/species.txt',
        '../image_data/Butterfly20/'
    ]
    
    for file_path in required_files:
        if os.path.exists(file_path):
            status = "✓ 存在"
        else:
            status = "✗ 缺失"
        print(f"{file_path}: {status}")

if __name__ == "__main__":
    print("蝴蝶图像分类数据分析（简化版）")
    print("=" * 60)
    
    check_required_files()
    class_counts, total_train = analyze_training_data_simple()
    test_count = analyze_test_data_simple()
    
    print("\n" + "=" * 60)
    print("数据分析完成!")
    print(f"训练图片总数: {total_train}")
    print(f"测试图片总数: {test_count}")
