import logging

import click
import joblib
from gensim.models import word2vec

logging.basicConfig(
    format="%(asctime)s : %(levelname)s : %(message)s", level=logging.INFO
)


@click.command()
@click.option(
    "--input-file",
    type=click.Path(exists=True),
    help="Input corpus file path",
    default="outputs/mol2vec_corpus.txt",
    required=True,
)
@click.option(
    "--output-file",
    type=click.Path(),
    help="Output model file path",
    default="outputs/mol2vec.model",
    required=True,
)
@click.option(
    "--n-jobs",
    type=click.INT,
    help="Number of CPU cores used, by default uses all available",
    default=-1,
    required=True,
)
def train_mol2vec(
    input_file: str,
    output_file: str,
    n_jobs: int,
    **kwargs,
):
    """
    Train a Mol2Vec model.
    """
    # following original Mol2Vec settings
    model = word2vec.Word2Vec(
        corpus_file=input_file,
        vector_size=300,
        window=10,
        min_count=3,
        sg=1,
        workers=joblib.effective_n_jobs(n_jobs),
        **kwargs,
    )
    model.save(output_file)


if __name__ == "__main__":
    train_mol2vec()
