import json
import random
import datetime
from pathlib import Path

def load_disease_database():
    """Load the disease database"""
    with open('disease_symptom_database.json', 'r', encoding='utf-8') as f:
        return json.load(f)

def get_random_diseases(disease_db, num_diseases):
    """Get random diseases with their codes"""
    diseases = disease_db.get('疾病库', [])
    selected_diseases = random.sample(diseases, min(num_diseases, len(diseases)))

    result = []
    for disease in selected_diseases:
        result.append({
            "疾病ID": disease["疾病ID"],
            "疾病名称": disease["疾病名称"],
            "疾病解释": disease.get("疾病解释", "")
        })

    return result

def generate_random_time(base_date, hours_range=(0, 23), minutes_range=(0, 59), seconds_range=(0, 59)):
    """Generate a random time string for a given base date"""
    hour = random.randint(*hours_range)
    minute = random.randint(*minutes_range)
    second = random.randint(*seconds_range)
    return base_date.strftime(f"%Y-%m-%d {hour:02d}:{minute:02d}:{second:02d}")

def generate_random_measurement(indicator_name):
    """Generate a random measurement value based on indicator type"""
    name_lower = indicator_name.lower()

    if any(word in name_lower for word in ['温度', '体温', '发热']):
        return round(random.uniform(35.0, 42.0), 1)
    elif any(word in name_lower for word in ['血压', '压']):
        return random.randint(60, 200)
    elif any(word in name_lower for word in ['心率', '脉搏']):
        return random.randint(40, 180)
    elif any(word in name_lower for word in ['糖', '葡萄糖']):
        return round(random.uniform(3.0, 20.0), 1)
    elif any(word in name_lower for word in ['蛋白', '抗原']):
        return round(random.uniform(0, 100), 2)
    elif any(word in name_lower for word in ['酶', '脱氢酶']):
        return round(random.uniform(0, 500), 2)
    else:
        return round(random.uniform(10, 100), 2)

def generate_severity_description():
    """Generate a random severity description"""
    severities = ["无", "轻微", "轻度", "中等", "中度", "重度", "严重", "极重", "危重"]
    return random.choice(severities)

def determine_indicator_type(indicator_id):
    """Determine if indicator is symptom (S) or measurement (I)"""
    return indicator_id.startswith('S')

def generate_time_series(start_date, num_points, months_range=(1, 12)):
    """Generate a time series with random dates and times"""
    time_series = []
    current_date = start_date

    for _ in range(num_points):
        time_str = generate_random_time(current_date)
        time_series.append(time_str)

        months_to_add = random.randint(*months_range)
        current_date = current_date + datetime.timedelta(days=months_to_add * 30)

    return time_series

def generate_symptoms_from_disease_db(disease_db, num_symptoms_range=(2, 8)):
    """Generate symptoms based on disease database"""
    diseases = disease_db.get('疾病库', [])
    num_symptoms = random.randint(*num_symptoms_range)

    # Get all available symptoms
    all_symptoms = []
    for disease in diseases:
        for symptom in disease.get('症状列表', []):
            if symptom not in all_symptoms:
                all_symptoms.append(symptom)

    if not all_symptoms:
        return []

    selected_symptoms = random.sample(all_symptoms, min(num_symptoms, len(all_symptoms)))
    result = []

    for symptom in selected_symptoms:
        symptom_entry = {
            "症状ID": symptom["symptom_id"],
            "症状名称": symptom["symptom_name"]
        }
        result.append(symptom_entry)

    return result

def generate_indicators_from_disease_db(disease_db, num_indicators_range=(3, 12)):
    """Generate indicators based on disease database"""
    diseases = disease_db.get('疾病库', [])
    num_indicators = random.randint(*num_indicators_range)

    # Get all available symptoms and indicators
    all_symptoms = []
    all_indicators = []

    for disease in diseases:
        for symptom in disease.get('症状列表', []):
            if symptom not in all_symptoms:
                all_symptoms.append(symptom)

    # For indicators, we'll generate some standard medical indicators
    standard_indicators = [
        {"id": "I048124", "name": "糖类抗原199正常", "type": "protein"},
        {"id": "I053357", "name": "低密度脂蛋白胆固醇正常", "type": "protein"},
        {"id": "I027623", "name": "中性粒细胞百分比轻度升高", "type": "cell_count"},
        {"id": "I073173", "name": "羟丁酸脱氢酶升高", "type": "enzyme"},
        {"id": "I066774", "name": "甲胎蛋白正常", "type": "protein"},
        {"id": "I020660", "name": "尿酸显著降低", "type": "metabolite"}
    ]

    selected_indicators = random.sample(standard_indicators, min(num_indicators, len(standard_indicators)))
    result = []

    for indicator in selected_indicators:
        indicator_entry = {
            "指标ID": indicator["id"],
            "指标名称": indicator["name"]
        }
        result.append(indicator_entry)

    return result

def generate_patient_data(patient_id, disease_db):
    """Generate data for a single patient"""
    # Define possible sections
    sections = {
        "基础体征": {
            "indicators": ["发热", "嗜睡", "甲胎蛋白正常", "肢体麻木", "黄疸", "肌肉痛", "呕血", "视力障碍", "寒战", "头痛", "鼻出血"]
        },
        "血压血糖": {
            "indicators": ["尿酸显著降低", "甲胎蛋白正常", "鼻出血", "头痛", "寒战"]
        },
        "健康建议": {
            "indicators": ["吞咽困难", "嗜睡", "黄疸", "不育", "平衡障碍"]
        }
    }

    patient_data = {}
    total_indicators = 0

    # Generate data for each section
    for section_name, section_info in sections.items():
        patient_data[section_name] = {}
        num_indicators = random.randint(2, len(section_info["indicators"]))

        for _ in range(num_indicators):
            indicator_name = random.choice(section_info["indicators"])

            # Generate random indicator ID
            if random.choice([True, False]):
                indicator_id = f"S{random.randint(100000, 999999):06d}_{random.randint(1, 20):03d}"
            else:
                indicator_id = f"I{random.randint(10000, 99999):05d}"

            # Generate time series
            num_time_points = random.randint(3, 15)
            start_date = datetime.datetime(2024 + random.randint(0, 2), random.randint(1, 12), random.randint(1, 28))
            time_series = generate_time_series(start_date, num_time_points)

            # Generate appropriate data based on indicator type
            if determine_indicator_type(indicator_id):
                # Symptom (S type)
                severities = [generate_severity_description() for _ in range(num_time_points)]
                patient_data[section_name][indicator_name] = {
                    "id": indicator_id,
                    "时间序列": time_series,
                    "严重程度": severities
                }
            else:
                # Measurement (I type)
                measurements = [generate_random_measurement(indicator_name) for _ in range(num_time_points)]
                patient_data[section_name][indicator_name] = {
                    "id": indicator_id,
                    "时间序列": time_series,
                    "测量值": measurements
                }

            total_indicators += 1

    # Generate diseases with codes
    num_diseases = random.randint(1, 4)
    diseases_with_codes = get_random_diseases(disease_db, num_diseases)

    # Generate symptoms and indicators from disease database
    symptoms_from_db = generate_symptoms_from_disease_db(disease_db, (3, 8))
    indicators_from_db = generate_indicators_from_disease_db(disease_db, (2, 6))

    # Create patient info
    patient_info = {
        "id": patient_id,
        "diseases": diseases_with_codes,
        "symptoms_from_database": symptoms_from_db,
        "indicators_from_database": indicators_from_db,
        "total_indicators": total_indicators
    }

    patient_data["patient_info"] = patient_info

    return patient_data

def main():
    """Main function to regenerate patient data files"""
    print("🔄 重新生成患者数据...")

    # Load disease database
    try:
        disease_db = load_disease_database()
        print(f"✅ 已加载疾病库，包含 {len(disease_db.get('疾病库', []))} 种疾病")
    except Exception as e:
        print(f"❌ 加载疾病库失败: {e}")
        return

    output_dir = Path("patient_data")
    output_dir.mkdir(exist_ok=True)

    # Generate patients from 0001 to 8934
    for i in range(1, 8935):  # 8935 because range is exclusive
        patient_id = f"patient_{i:04d}"
        print(f"生成 {patient_id}...")

        try:
            # Generate patient data
            patient_data = generate_patient_data(patient_id, disease_db)

            # Write to JSON file
            output_file = output_dir / f"{patient_id}.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(patient_data, f, ensure_ascii=False, indent=2)

            print(f"✅ 已生成 {patient_id}")

        except Exception as e:
            print(f"❌ 生成 {patient_id} 时出错: {e}")

if __name__ == "__main__":
    main()
