import sys
ix = int(sys.argv[1])
import pandas as pd
# Step 1: Read the input files
ratings_df = pd.read_csv(f'rating_{ix}.csv')
flip_results_df = pd.read_csv(f'../figures/human_eval/figure_rank_three_{ix}/method_order/method_order_{ix}.txt', sep='\t', header=None)

# Preprocessing: Remove trailing white spaces from column names
ratings_df.columns = ratings_df.columns.str.strip()
ratings_df['tied_all_bad'] = ratings_df['tied_all_bad'].str.lower()


# Splitting and extracting relevant columns from flip_results_df
split_columns = flip_results_df[0].str.split(',', expand=True)
method_order_df = split_columns.iloc[:, -3:]
method_order_df.columns = ['Method_1', 'Method_2', 'Method_3']

# Reordering the ratings based on method order
ratings_df_reordered = pd.DataFrame(columns=['p_direct', 'p_compose', 'proj_0.5', 'all_bad'])

# Iterate over each row to reorder the ratings
for idx, (rating_row, order_row) in enumerate(zip(ratings_df.iterrows(), method_order_df.iterrows())):
    _, rating = rating_row
    _, order = order_row
    
    if str(rating['tied_all_bad']).lower() == 'yes':
        ratings_df_reordered.loc[idx] = [None, None, None, 'yes']
    else:
        mapping = {
            order['Method_1'].strip(): rating['method_0'],
            order['Method_2'].strip(): rating['method_1'],
            order['Method_3'].strip(): rating['method_2']
        }
        ratings_df_reordered.loc[idx] = [mapping.get('p_direct', None),
                                         mapping.get('p_compose', None),
                                         mapping.get('proj_0.5', None),
                                         None]

# Convert the ratings to integer type for accurate counting
ratings_df_reordered = ratings_df_reordered.apply(pd.to_numeric, errors='ignore', downcast='integer')

# Step 3: Save the reordered DataFrame to a new CSV file
ratings_df_reordered.fillna('', inplace=True)
ratings_df_reordered.to_csv(f'reordered_ratings_{ix}.csv', index=False)


## filter out rows where the rating has more than one 1
filtered_ratings = ratings_df_reordered[ratings_df_reordered[['p_direct', 'p_compose', 'proj_0.5']].apply(lambda row: sum(row == 1), axis=1) != 2]
# Count the number of times each method is ranked first for the filtered rows
filtered_first_counts = {
    'p_direct': (filtered_ratings['p_direct'] == 1).sum(),
    'p_compose': (filtered_ratings['p_compose'] == 1).sum(),
    'proj_0.5': (filtered_ratings['proj_0.5'] == 1).sum()
}

all_bad_count_filtered = (filtered_ratings['all_bad'] == 'yes').sum()
filtered_first_counts, all_bad_count_filtered

print("Number of times each method is ranked first:", filtered_first_counts)
print("Number of times the rating is 'all_bad':", all_bad_count_filtered)



