import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

df = pd.read_csv('PATH_TO_YOUR_RAW_CSV', encoding='ISO-8859-1')

# convert the text data to a TF-IDF matrix
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['Sentence'])

# calculate the similarity matrix
similarity_matrix = cosine_similarity(tfidf_matrix)

# define the threshold
threshold = 0.3

# find the similar items higher than the threshold and mark them
rows_to_delete = set()
for i in range(len(similarity_matrix)):
    for j in range(i+1, len(similarity_matrix)):
        if similarity_matrix[i, j] > threshold:
            rows_to_delete.add(j)

# delete the marked rows
df_filtered = df.drop(rows_to_delete)

df_filtered.to_csv('PATH_TO_YOUR_CSV', index=False)
