# %%
import os
import argparse


def get_args():

    parser = argparse.ArgumentParser()
    parser.add_argument('--model_name', type=str, default='qwen/Qwen2.5-1.5B-Instruct')
    parser.add_argument('--model_short_name', type=str, default='qwen_1.5B')
    parser.add_argument('--sample_path', type=str, default='lime_samples')
    args,_ = parser.parse_known_args()
    return args

args = get_args()
model_name = args.model_name
model_short_name = args.model_short_name
sample_path = args.sample_path

# Print args
print(f"Model Name: {model_name}")
print(f"Model Short Name: {model_short_name}")
print(f"Sample Path: {sample_path}")



# %%
from modelscope import AutoModelForCausalLM, AutoTokenizer,GenerationConfig
import torch
from openai import OpenAI
import numpy as np
import sys
import pandas as pd

# %%
data = pd.read_json(f"nq/nq-dev.jsonl", lines=True)

# %%
class QwenPredictor:
    
    def __init__(self, model_name="Qwen/Qwen2.5-1.5B-Instruct", **kwargs):
        super().__init__(**kwargs)
        self.model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype="auto",device_map="sequential",attn_implementation='flash_attention_2',temperature=1e-5)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    @torch.no_grad()
    def predict(self, text, **kwargs):
        messages = [
        {"role": "system", "content": "You are a helpful assistant, answer the question briefly within 10 words. You will get penalty if you answer too long."},
        {"role": "user", "content": text}
        ]
        # print(messages)
        input_texts = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        # print(input_texts)
        
        model_inputs = self.tokenizer(input_texts, return_tensors="pt",add_special_tokens=False).to('cuda:0')
        res = self.model.generate(
            **model_inputs,
            # max_new_tokens=1,
            output_logits=True,
            return_dict_in_generate=True,
            pad_token_id=self.tokenizer.eos_token_id,
            # do_sample=False,
            # temperatrue=1e-5,
            generation_config=GenerationConfig(
                # max_new_tokens=200,
                # temperature=1e-5,
                # top_k = 1
                do_sample=False,
                # repetition_penalty=1.2
            ),
        )
        return self.tokenizer.batch_decode(res.sequences[:,model_inputs["input_ids"].shape[1]:].cpu().numpy(),skip_special_tokens=True)[0]


# %%
predictor = QwenPredictor(model_name=model_name)

# %%

samples_df = pd.read_csv(f'./{sample_path}/nq_perturb.csv', sep='\t', index_col=None, keep_default_na=False, dtype={'binary_representation': str})
samples_df

# %%
samples_df['Answer'] = ""

# %%
if os.path.exists(f'./{sample_path}/nq_perturb_{model_short_name}.csv'):
    samples_df = pd.read_csv(f'./{sample_path}/nq_perturb_{model_short_name}.csv', sep='\t', index_col=None, keep_default_na=False, dtype={'binary_representation': str})

# %%
from tqdm.auto import tqdm

for i in tqdm(range(len(samples_df))):
    # print(samples_df.iloc[i]['binary_representation'])
    # print(samples_df.iloc[i]['question'])
    # print(samples_df.iloc[i]['answer'])
    # print(samples_df.iloc[i]['perturbation'])
    # print(samples_df.iloc[i]['perturbation_answer'])
    # website = data['document_html'][samples_df.iloc[i]['question_index']]

    if samples_df.loc[i,'Answer'] != "":
        continue
    samples_df.loc[i,'Answer'] = predictor.predict( samples_df.iloc[i]['sample_question'])
    # print(len(samples_df.loc[i,'Answer'].split()))
    if i % 10000 ==0:
        samples_df.to_csv(f'./{sample_path}/nq_perturb_{model_short_name}.csv', sep='\t', index=False, header=True)
samples_df.to_csv(f'./{sample_path}/nq_perturb_{model_short_name}.csv', sep='\t', index=False, header=True)

