"""Common File Read utilities.
"""
__author__ = 'XYZ'


import errno
import glob
import json
import os
import pathlib
import sys
import random

from collections import OrderedDict


from ._log_ import logger

this = sys.modules[__name__]

log = logger(__file__)


def get_basepath(p):
  """Ensures the last Directory of a path in a consistent ways.

  basepath is returned for a file or path. It takes care of trailing
  slash for a file or a directory.
  """
  if os.path.isdir(p):
    base_path = os.path.join(p, '')
  else:
    base_path = os.path.join(os.path.dirname(p), '')
  _bp = base_path.rstrip(os.path.sep)
  if os.path.isfile(_bp):
    _bp = get_basepath(_bp)
  return _bp


def find_files(input_dir, allowed_exts=[], allowed_filenames=[], ext_case_sensitive=False, filename_case_sensitive=False):
  directory_structure = {}
  basepath = os.path.abspath(input_dir)
  
  for root, dirs, files in os.walk(input_dir):
      ## Extract the directory name relative to the input directory
      relative_dir = os.path.relpath(root, input_dir)
      
      ## Skip the input directory itself
      if relative_dir == '.':
          continue
      
      ## Split the directory path into parts
      dir_parts = relative_dir.split(os.path.sep)
      
      ## Initialize the current nlevel dictionary
      current_dict = directory_structure
      
      ##Iterate over directory parts to create nested dictionaries
      for part in dir_parts:
          # Create a nested dictionary for the current directory part if it doesn't exist
          if part not in current_dict:
              current_dict[part] = {}
          
          ## Update the current dictionary reference to the nested dictionary
          current_dict = current_dict[part]
      
      ## Filter files based on allowed extensions and filenames
      filtered_files = []
      for file in files:
          _, ext = os.path.splitext(file)
          filename = os.path.splitext(os.path.basename(file))[0]
          if (not allowed_exts or (ext.lower() in [ext.lower() for ext in allowed_exts])) and \
             (not allowed_filenames or (filename_case_sensitive and filename in allowed_filenames) or \
                                       (not filename_case_sensitive and filename.lower() in [name.lower() for name in allowed_filenames])):
              filtered_files.append(file)
      
      ## Append filtered file paths to the list associated with the last directory
      file_paths = [os.path.join(root, file) for file in filtered_files]
      file_names = [file for file in filtered_files]
      if file_paths and file_names:
          current_dict['_filepaths'] = file_paths
          current_dict['_filenames'] = file_names
          ## Update the _basepath to be the base path of the files in the _files array
          current_dict['_basepath'] = os.path.dirname(file_paths[0])
  
  return directory_structure


def get_input_files(
  from_path,
  allowed_extensions=('.jpg', '.jpeg', '.png'),
  sample_limit=None,
  shuffle=False,
  seed=42,
):
  ## Normalize allowed extensions to lowercase for case-insensitive comparison
  allowed_extensions = tuple(ext.lower() for ext in allowed_extensions)

  paths = from_path.split(',')
  input_files = []

  for path in paths:
    path = path.strip()
    if os.path.isdir(path):
      # Collect all files and filter by extension in lowercase
      all_files = glob.glob(os.path.join(path, '**/*'), recursive=True)
      input_files.extend([
        f for f in all_files if os.path.isfile(f) and f.lower().endswith(allowed_extensions)
      ])
    elif os.path.isfile(path):
      if path.lower().endswith(allowed_extensions):
        input_files.append(path)
      elif path.lower().endswith('.csv'):
        input_files.extend(process_csv(path))
    else:
      raise FileNotFoundError(f"Path '{path}' does not exist.")

  ## Shuffle before limiting
  if shuffle:
    random.seed(seed)
    random.shuffle(input_files)

  ## Apply sample limit
  if sample_limit is not None:
    input_files = input_files[:sample_limit]

  return input_files


def py_load(filepath, ordered=False, encoding='utf-8'):
  import runpy

  fc = runpy.run_path(filepath)
  return fc


def json_load(filepath, ordered=False, encoding='utf-8'):
  """Load json file as easy dictionary object."""
  fc = None
  with open(filepath, encoding=encoding) as f:
    fc = json.load(f)
    if ordered:
      fc = OrderedDict(fc)
  return fc


def __yaml_load(filepath, ordered=False, encoding='utf-8'):
  import ruamel.yaml as yaml
  """Safe load yaml file as an easy dictionary object."""

  fc = None
  with open(filepath, 'r', encoding=encoding) as f:
    yaml_parser = yaml.YAML(typ='safe', pure=True)
    fc = yaml_parser.load(f)
    if ordered:
      fc = OrderedDict(fc)
  return fc


def yaml_load(*args, **kwargs):
  """Wrapper for yaml_load function."""
  return __yaml_load(*args, **kwargs)


def yml_load(*args, **kwargs):
  """Wrapper for yaml_load function."""
  return __yaml_load(*args, **kwargs)


def list_files(p, pattern=r'**/*', ext=None):
  """List files in a given path."""
  _pattern = fr'**/*{ext}' if ext else pattern
  for f in pathlib.Path(p).glob(_pattern):
    # log.debug('f: {}'.format(f))
    if os.path.isfile(f):
      # log.debug('f.as_posix(): {}'.format(f.as_posix()))
      yield f.as_posix()


def get_files(p, ext='.json'):
  if os.path.isfile(p):
    files = [p]
  elif os.path.isdir(p):
    p = get_basepath(p)
    ## this returns the generator
    files = list_files(p, ext=ext)
  else:
    raise ValueError('Invalid path.')
  return files


def get_filepaths(p, ext='.json'):
  files = get_files(p, ext=ext)
  log.debug('files: {}'.format(files))
  for filepath in files:
    log.debug('filepath: {}'.format(filepath))
    yield filepath


def load_file(filepath, ordered=False, encoding='utf-8'):
  """Safe load file as easy dictionary object by default. Currently, `yml`,
  `json`, `csv` files are supported."""
  fc = None
  if (
      isinstance(filepath, str) and
      os.path.exists(filepath) and
      os.path.isfile(filepath)
  ):
    ext = os.path.splitext(filepath)[-1].split('.')[-1]
    # log.debug(f'ext, type(ext): {ext}, {type(ext)}')

    ## normalise the file extension irrespective of the casing of the file extension
    ext = ext.lower() if ext else ''
    fn = getattr(this, ext + '_load')
    
    if not fn:
      raise OSError(
          'Invalid filetype: {}. Only yml, yaml, json and csv types are supported.'.format(
              ext,
          ),
      )
    fc = fn(filepath, ordered, encoding)
  else:
    log.exception(f'Invalid filepath: {filepath}', exc_info=True)
  return fc


def load_filecontent(p, ext='.json'):
  import pandas as pd
  pd.set_option('future.no_silent_downcasting', True)

  df = None
  for filepath in get_filepaths(p, ext=ext):
    log.info('loading data for filepath: {}'.format(filepath))

    filename = os.path.basename(filepath)
    data = load_file(filepath, ordered=True)
    
    ## Process list or dict
    if isinstance(data, list):
      df = pd.DataFrame(data)
    elif isinstance(data, dict) or isinstance(data, OrderedDict):
      processed_data = {}
      for key, value in data.items():
        if isinstance(value, list):
          processed_data[key] = {f'item_{i}': v for i, v in enumerate(value)}
        else:
          processed_data[key] = value
      df = pd.DataFrame.from_dict(processed_data, orient='index')
    else:
      errormsg = ValueError(f"Unsupported format in {filepath}. Root element must be a list or dict.")
      log.error(f'errormsg: {errormsg}')
    yield df,filepath,filename
