import pyBigWig
import argparse
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import logging

_logger = logging.getLogger(__name__)


def plot_scores(
    bigwig_file: str, bed: pd.DataFrame, file_path: str, verbose: bool = True
):
    # open the bigwig file
    bw = pyBigWig.open(bigwig_file)

    # iterate over the BED file
    for index, row in bed.iterrows():
        # get the chromosome and size from the BED file
        chrom = row["chrom"]
        size = row["end"]
        chrom_num = chrom.split("chr")[1]

        # if verbose print using logger:
        if verbose:
            _logger.info(
                f"Processing chromosome: {chrom}, chromosome number: {chrom_num}"
            )

        # get the conservation scores from the bigwig file in 1000bp bins from 0 to size
        bins = int(np.ceil(size / 1000))
        scores = []
        for i in range(bins):
            start = i * 1000
            end = min((i + 1) * 1000, size)
            intervals = bw.intervals(chrom, start, end)
            if intervals is not None:
                vals = np.array([interval[2] for interval in intervals])
                scores.append(np.mean(vals))
            else:
                scores.append(0)
        # use these average scores to plot the conservation across the whole chromosome for every bin
        plt.scatter(range(bins), scores, s=1)
        plt.title(f"Conservation Scores for Chromosome {chrom_num}")
        # force all axis to be the same for every plot
        plt.ylim(-20, 9)
        plt.xlim(0, bins)
        plt.xlabel("1,000bp Bin")
        plt.ylabel("Conservation Score")
        plt.savefig(f"{file_path}conservation_chr{chrom_num}.png")
        plt.close()

    # close the bigwig file
    bw.close()
    # if verbose print where saved
    if verbose:
        _logger.info(f"Saved conservation scores to {file_path}")


def main():
    # process command line arguments
    parser = argparse.ArgumentParser(
        description="Plot the conservation scores in a bigwig file"
    )
    parser.add_argument(
        "--bigwig_file",
        type=str,
        default="/home/name/gamba/data_processing/data/240-mammalian/241-mammalian-2020v2.bigWig",
        help="Path to the bigwig file with phyloP scores",
    )
    parser.add_argument(
        "--bed_file",
        type=str,
        default="/home/name/gamba/data_processing/data/240-mammalian/hg38.bed",
        help="File name of the bed file",
    )
    parser.add_argument(
        "--file_path",
        type=str,
        default="/home/name/gamba/data_processing/data/240-mammalian/data_vis/",
        help="Directory to save the plotted scores",
    )
    args = parser.parse_args()

    # load the BED file to pandas df
    bed = pd.read_csv(
        args.bed_file, sep="\t", header=None, names=["chrom", "start", "end"]
    )

    plot_scores(args.bigwig_file, bed, args.file_path)


if __name__ == "__main__":
    main()
