import gc
import torch
import contextlib

from vllm.utils import is_cpu
from vllm.distributed import destroy_distributed_environment, destroy_model_parallel

def vllm_cleanup():
    destroy_model_parallel()
    destroy_distributed_environment()
    with contextlib.suppress(AssertionError):
        torch.distributed.destroy_process_group()
    gc.collect()
    if not is_cpu():
        torch.cuda.empty_cache()