import torch
import transformers
from transformers import AutoTokenizer
from langchain.chains import LLMChain
from langchain_huggingface import HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
import re
import pandas as pd
import os
import logging
import warnings
warnings.filterwarnings('ignore')

logging.getLogger("transformers.pipelines").setLevel(logging.ERROR)
os.environ["TRANSFORMERS_VERBOSITY"] = "error"

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)
transformers.logging.set_verbosity_error()

FR_RAW_DATA_DIR = ''
REVIEWS_PATH = FR_RAW_DATA_DIR + '/Reviews.csv'


FOOD_REVIEW_SIZE = 20000


model = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model, token='')

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
    max_length=100,
    max_new_tokens=80,
    eos_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})

template = """Give a concise summary of the review delimited by triple backticks. ```{text}``` SUMMARY:"""

prompt = PromptTemplate(template=template, input_variables=["text"])
llm_chain = prompt | llm


csv_df = pd.read_csv(REVIEWS_PATH)
csv_df = csv_df[:FOOD_REVIEW_SIZE]

csv_df['Summary'] = ''

i = 0
for index, row in csv_df.iterrows():
    review = csv_df.at[index, 'Text']
    summary = llm_chain.invoke(review)
    summary = re.split('SUMMARY:',summary)[-1]
    summary = summary.replace('\n', '')
    csv_df.at[index, 'Summary'] = summary
    i += 1

csv_df.to_csv(FR_RAW_DATA_DIR + '/Reviews_Summary.csv', index=False)