library(plotly)
library(dplyr)

# Load the data
df <- read.csv("/workdir/cancer_related_pathways.csv")

# Parse the GeneRatio to get numeric values
df$GeneRatio_numeric <- as.numeric(sapply(strsplit(df$GeneRatio, "/"), function(x) as.numeric(x[1])/as.numeric(x[2])))

# Create color scale for adjusted p-values (lower p-values = more significant, use negative log)
df$neg_log_padj <- -log10(df$p.adjust)

# Sort by adjusted p-value for better ordering (most significant at top)
df <- df %>% arrange(p.adjust)

# Create a numeric index for y-axis positioning (avoids factor issues)
df$y_pos <- 1:nrow(df)

# Truncate long pathway names for better display
df$Description_short <- ifelse(nchar(df$Description) > 45, 
                              paste0(substr(df$Description, 1, 42), "..."), 
                              df$Description)

# Create the bubble plot with numeric positioning
p <- plotly::plot_ly(
  data = df,
  x = ~GeneRatio_numeric,
  y = ~y_pos,
  size = ~Count,
  color = ~p.adjust,
  type = "scatter",
  mode = "markers",
  # Scale bubble sizes appropriately
  sizeref = 2.0 * max(df$Count) / (50^2),
  sizemin = 12,
  text = ~paste("Pathway:", Description,
                "<br>Gene Ratio:", GeneRatio,
                "<br>Gene Count:", Count,
                "<br>Adjusted p-value:", format(p.adjust, scientific = TRUE, digits = 3),
                "<br>Fold Enrichment:", round(FoldEnrichment, 2)),
  hovertemplate = "%{text}<extra></extra>",
  marker = list(
    line = list(width = 1, color = "white"),
    opacity = 0.8
  ),
  # Use a color scale that goes from light (high p-value) to dark (low p-value)
  colorscale = list(
    c(0, "#f7fbff"),
    c(0.2, "#deebf7"), 
    c(0.4, "#c6dbef"),
    c(0.6, "#9ecae1"),
    c(0.8, "#4292c6"),
    c(1, "#08519c")
  ),
  reversescale = TRUE  # Reverse so smaller p-values are darker
) %>%
  layout(
    title = list(
      text = "Cancer-Related Pathways Enrichment Analysis",
      font = list(size = 16, family = "Arial, sans-serif")
    ),
    xaxis = list(
      title = "Gene Ratio",
      titlefont = list(size = 14),
      tickfont = list(size = 12),
      # Improved x-axis scaling
      range = c(0, max(df$GeneRatio_numeric) * 1.15),
      tickmode = "linear",
      tick0 = 0,
      dtick = 0.02,
      showgrid = TRUE,
      gridcolor = "lightgray",
      gridwidth = 1,
      zeroline = TRUE,
      zerolinecolor = "gray"
    ),
    yaxis = list(
      title = "Pathway",
      titlefont = list(size = 14),
      tickfont = list(size = 10),
      # Use custom tick labels for pathway names
      tickmode = "array",
      tickvals = df$y_pos,
      ticktext = df$Description_short,
      showgrid = TRUE,
      gridcolor = "lightgray",
      gridwidth = 1,
      range = c(0.5, nrow(df) + 0.5)
    ),
    plot_bgcolor = "white",
    paper_bgcolor = "white",
    margin = list(l = 400, r = 120, t = 80, b = 80)
  ) %>%
  colorbar(
    title = "Adjusted p-value",
    titleside = "right",
    len = 0.8,
    thickness = 15
  )

# Save the plot
htmlwidgets::saveWidget(p, "/workdir/execution_outputs/improved_bubble_plot.html")

print("✓ Improved bubble plot created successfully!")
print("  File saved: /workdir/execution_outputs/improved_bubble_plot.html")
print("")
print("=== Key Improvements Made ===")
print(paste("• X-axis scale: 0 to", round(max(df$GeneRatio_numeric) * 1.15, 3), "with regular 0.02 intervals"))
print(paste("• Bubble sizes: Better scaling with minimum size of 12 pixels"))
print(paste("• Color scale: Darker colors for more significant pathways (lower p-values)"))
print(paste("• Layout: Pathways ordered by significance (most significant at top)"))
print(paste("• Margins: Increased space for pathway names and color legend"))
print("")
print("Data summary:")
print(paste("• Gene ratio range:", round(min(df$GeneRatio_numeric), 4), "to", round(max(df$GeneRatio_numeric), 4)))
print(paste("• Gene count range:", min(df$Count), "to", max(df$Count), "genes"))
print(paste("• Most significant pathway:", df$Description[1]))
print(paste("• Most genes in pathway:", df$Description[which.max(df$Count)]))