
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch


tokenizer = AutoTokenizer.from_pretrained("path")
model = AutoModelForSequenceClassification.from_pretrained("path")


classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    truncation=True,
    max_length=512,
    device=torch.device("cuda:1" if torch.cuda.is_available() else "cpu"),
)

def classify_responses_from_jsonl(file_path):

    results = []
    normal_count = 0

    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            try:
                data = json.loads(line.strip())

                response = data.get("response", None)
                if response is None:
                    print(f"Warning: 'response' field missing in line: {line}")
                    continue

                classification_result = classifier(response)

                if classification_result[0]['label'] == 'NORMAL':
                    normal_count += 1

                results.append({
                    "response": response,
                    "classification": classification_result
                })
            except Exception as e:
                print(f"Error processing line: {line}. Error: {e}")
    
    return results, normal_count



file_path = "file"
classification_results, normal_count = classify_responses_from_jsonl(file_path)


for result in classification_results:
    print(f"Response: {result['response']}")
    print(f"Classification: {result['classification']}")
    print("-" * 50)


print(f"Total responses classified as 'NORMAL': {normal_count}")