import re


def parse_term(term_str):
    """
    Converts a single term of a polynomial to the C_ E_ E_ format.
    Example: "-10*x0^9*x1^11" -> "C-10 E9 E11"
             "-x0^9*x1" -> "C-1 E9 E1"
             "18*x0^8" -> "C18 E8 E0"
             "-9*x0" -> "C-9 E1 E0"
             "30" -> "C30 E0 E0"
    """
    term_str = term_str.strip()
    if not term_str:
        return ""

    coeff = 1
    exp0 = 0
    exp1 = 0

    # Extract coefficient
    match_coeff = re.match(r"^[+-]?\d+", term_str)
    if match_coeff:
        coeff = int(match_coeff.group(0))
        term_str = term_str[match_coeff.end() :]
    elif term_str.startswith("-"):
        coeff = -1
        term_str = term_str[1:]
    elif term_str.startswith("+"):  # Should not happen if split correctly but handle defensively
        term_str = term_str[1:]

    # Remove leading '*' if any (e.g., from "10*x0...")
    if term_str.startswith("*"):
        term_str = term_str[1:]

    # Extract exponents for x0
    match_x0 = re.search(r"x0(?:\^(\d+))?", term_str)
    if match_x0:
        exp0 = int(match_x0.group(1)) if match_x0.group(1) else 1
        # Remove x0 part to avoid confusion if x1 is searched next in a combined term like x0x1
        # This is a simplification; assumes x0 always appears before x1 if both are present without '*'
        # A more robust parser might be needed for very complex non-standard terms.

    # Extract exponents for x1
    match_x1 = re.search(r"x1(?:\^(\d+))?", term_str)
    if match_x1:
        exp1 = int(match_x1.group(1)) if match_x1.group(1) else 1

    # If term_str was just a number (constant term), exp0 and exp1 will remain 0.
    # e.g. parse_term("30") -> coeff=30, exp0=0, exp1=0
    # If term_str was like "x0", coeff=1 (or -1), exp0=1.

    return f"C{coeff} E{exp0} E{exp1}"


def process_polynomial(poly_str):
    """
    Processes a polynomial string into a sequence of C_ E_ E_ terms.
    Example: "-x0^9*x1 - 10*x0^8*x1^2" -> "C-1 E9 E1 C-10 E8 E2"
    """
    poly_str = poly_str.strip()
    if not poly_str:
        return ""

    # Replace spaces around operators for easier splitting
    poly_str = poly_str.replace(" ", "")
    # Normalize by replacing '-' with "+-" to handle subtraction as adding a negative term
    # but be careful not to change "x0^-3" if such exponents were allowed (not in this problem)
    poly_str = poly_str.replace("-", "+-")
    if poly_str.startswith("+-"):  # Correct if first term was negative
        poly_str = poly_str[1:]

    terms = poly_str.split("+")

    processed_terms = []
    for term in terms:
        if term:  # Skip empty strings that might result from splitting
            processed_terms.append(parse_term(term))

    return " ".join(filter(None, processed_terms))


def main():
    input_file_path = "data/data/small/gcd/ZZ_n=2_terms=4/train_raw.txt"
    output_file_path_test = "data/data/small/gcd/ZZ_n=2_terms=4/data.train"
    # output_file_path_train = "data.train" # For future use if a train_raw.txt is provided

    processed_lines = []
    try:
        with open(input_file_path, "r", encoding="utf-8") as f_in:
            for line_num, line in enumerate(f_in):
                line = line.strip()
                if not line:
                    continue

                # Split by '#' into main and target polynomials
                parts = line.split("#", 1)
                if len(parts) != 2:
                    print(f"Warning: Line {line_num+1} skipped, missing '#' separator: {line[:100]}...")
                    continue

                main_part, target_part = parts[0].strip(), parts[1].strip()

                # Split main_part by '|' into two polynomials
                poly_parts = main_part.split("|", 1)
                if len(poly_parts) != 2:
                    print(
                        f"Warning: Line {line_num+1} skipped, missing '|' separator in main part: {main_part[:100]}..."
                    )
                    continue

                poly1_str, poly2_str = poly_parts[0].strip(), poly_parts[1].strip()

                processed_poly1 = process_polynomial(poly1_str)
                processed_poly2 = process_polynomial(poly2_str)
                processed_target_poly = process_polynomial(target_part)

                # Format: processed_poly1 [SEP] processed_poly2 : processed_target_poly
                output_line = f"{processed_poly1} [SEP] {processed_poly2} : {processed_target_poly}"
                processed_lines.append(output_line)

    except FileNotFoundError:
        print(f"Error: Input file not found at {input_file_path}")
        return
    except Exception as e:
        print(f"An error occurred during processing: {e}")
        return

    try:
        with open(output_file_path_test, "w", encoding="utf-8") as f_out:
            for processed_line in processed_lines:
                f_out.write(processed_line + "\n")
        print(f"Successfully processed data and saved to {output_file_path_test}")
        print(f"A total of {len(processed_lines)} lines were processed.")

        # Guidance for data.train
        print("\nTo generate 'data.train', you would need a corresponding 'train_raw.txt' file.")
        print("You can then modify the 'input_file_path' in this script to point to 'train_raw.txt'")
        print("and change 'output_file_path_test' to 'data.train' to generate the training data.")

    except IOError:
        print(f"Error: Could not write to output file {output_file_path_test}")


if __name__ == "__main__":
    main()
