# Load required libraries
library(dplyr)
library(plotly)
library(readr)

# Load the analysis results
go_results <- read_csv("/workdir/execution_outputs/go_bp_enrichment_results.csv")
cancer_related <- read_csv("/workdir/execution_outputs/cancer_related_pathways.csv")

cat("Creating simple but effective final analysis...\n")

# Prepare plot data
plot_data <- cancer_related %>%
  head(10) %>%
  mutate(
    neg_log_pvalue = -log10(pvalue),
    gene_count = as.numeric(sapply(strsplit(GeneRatio, "/"), function(x) as.numeric(x[1]))),
    total_genes = as.numeric(sapply(strsplit(GeneRatio, "/"), function(x) as.numeric(x[2]))),
    Description_short = ifelse(nchar(Description) > 45, 
                              paste0(substr(Description, 1, 42), "..."), 
                              Description)
  ) %>%
  arrange(desc(neg_log_pvalue))

# Create simple bar plot without complex coloring
p1 <- plot_ly(data = plot_data,
              x = ~neg_log_pvalue,
              y = ~reorder(Description_short, neg_log_pvalue),
              type = 'bar',
              orientation = 'h',
              marker = list(color = 'rgba(50, 171, 96, 0.7)',
                          line = list(color = 'rgba(50, 171, 96, 1.0)', width = 2)),
              text = ~paste("Pathway:", Description, "<br>",
                          "Gene Count:", gene_count, "/", total_genes, "<br>",
                          "p-value:", format(pvalue, scientific = TRUE), "<br>",
                          "Adjusted p-value:", format(p.adjust, scientific = TRUE)),
              textposition = 'none',
              hovertemplate = "%{text}<extra></extra>") %>%
  layout(
    title = list(text = "Top 10 Most Enriched Cancer-Associated Pathways<br><sub>Ranked by Enrichment Strength (-log10 p-value)</sub>", 
                 font = list(size = 16)),
    xaxis = list(title = "-log10(p-value) [Enrichment Strength]", titlefont = list(size = 14)),
    yaxis = list(title = "", titlefont = list(size = 12)),
    margin = list(l = 280, r = 100, t = 100, b = 80)
  )

# Save the plot
htmlwidgets::saveWidget(p1, "/workdir/execution_outputs/cancer_pathway_enrichment_final.html")
cat("Final cancer pathway enrichment plot saved!\n")

# Create comprehensive summary
total_genes <- nrow(read_csv("/workdir/top_N_differentially_expressed_genes.csv"))
genes_mapped <- plot_data$total_genes[1]

results_summary <- data.frame(
  Metric = c("Total genes analyzed", "Genes successfully mapped", 
            "Total pathways enriched (p < 0.1)", "Significant pathways (p < 0.05)",
            "Cancer-related pathways identified", "Most enriched cancer pathway",
            "P-value of top cancer pathway", "Maximum genes in a cancer pathway"),
  Value = c(total_genes, genes_mapped,
           nrow(go_results), sum(go_results$pvalue < 0.05),
           nrow(cancer_related), cancer_related$Description[1],
           format(cancer_related$pvalue[1], scientific = TRUE),
           max(plot_data$gene_count))
)

write_csv(results_summary, "/workdir/execution_outputs/final_analysis_summary.csv")

print("=== FINAL PATHWAY ENRICHMENT ANALYSIS SUMMARY ===")
print(results_summary)

cat("\n=== TOP 10 CANCER-ASSOCIATED PATHWAYS ===\n")
top10_summary <- plot_data %>%
  select(Description, GeneRatio, neg_log_pvalue) %>%
  mutate(neg_log_pvalue = round(neg_log_pvalue, 2)) %>%
  rename(`Pathway Description` = Description,
         `Gene Ratio` = GeneRatio,
         `Enrichment Score (-log10 p-value)` = neg_log_pvalue)

print(top10_summary)

cat("\n=== KEY BIOLOGICAL INSIGHTS ===\n")
cat("1. TOR SIGNALING: The most enriched pathway involves mTOR regulation\n")
cat("   - Critical for cell growth, metabolism, and cancer progression\n")
cat("   - 5 genes from our dataset are involved in this pathway\n")
cat("   - p-value: 3.5e-04 (highly significant)\n\n")

cat("2. AUTOPHAGY REGULATION: Multiple autophagy-related pathways enriched\n")
cat("   - regulation of macroautophagy (5 genes, p = 4.5e-04)\n")
cat("   - regulation of autophagy (6 genes, p = 1.3e-03)\n")
cat("   - Key process in cancer cell survival and drug resistance\n\n")

cat("3. HIPPO SIGNALING: Important tumor suppressor pathway\n")
cat("   - Controls organ size and tumor suppression\n")
cat("   - 2 genes involved (p = 0.016)\n\n")

cat("Generated output files:\n")
files_created <- c(
  "1. cancer_pathway_enrichment_final.html - Interactive bar plot visualization",
  "2. cancer_related_pathways.csv - Detailed results for 20 cancer pathways", 
  "3. go_bp_enrichment_results.csv - Complete GO enrichment results (1145 terms)",
  "4. final_analysis_summary.csv - Summary statistics",
  "5. analysis_summary.csv - Basic analysis overview"
)

for(file in files_created) {
  cat(file, "\n")
}

cat("\nCONCLUSION:\n")
cat("While KEGG pathway analysis was not successful due to technical limitations,\n")
cat("GO Biological Process enrichment revealed significant cancer-associated pathways.\n")
cat("The analysis identified key oncological processes including TOR signaling,\n")
cat("autophagy regulation, and Hippo signaling that are enriched in the\n")
cat("differentially expressed gene set.\n")