# -*- coding: utf-8 -*-
"""
Created on Fri Jan 24 09:45:12 2025

@author: baran
"""
from prompt_maker import input_maker
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

input_reports = input_maker()
input_reports = list(input_reports)
documents = input_reports
tagged_data = [TaggedDocument(words=doc.split(), tags=[str(i)]) for i, doc in enumerate(documents)]
model = Doc2Vec(vector_size=100,  # Dimensionality of the feature vectors
            window=5,         # Context window size
            min_count=2,      # Ignores words with total frequency lower than this
            workers=4,        # Number of worker threads for training
            epochs=40)        # Number of training epochs

# Build vocabulary from tagged data
model.build_vocab(tagged_data)

# Train the model
model.train(tagged_data, total_examples=model.corpus_count, epochs=model.epochs)