# Copyright 2024 the LlamaFactory team. # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. import json import os from typing import TYPE_CHECKING, Any, Dict, st, Tuple from ...extras.constants import DATA_CONFIG from ...extras.packages import is_gradio_available if is_gradio_available():  import gradio as gr if TYPE_CHECKING:  from gradio.components import Component PAGE_SIZE = 2 def prev_page(page_index: int) -> int:  return page_index - 1 if page_index > 0 else page_index def next_page(page_index: int, total_num: int) -> int:  return page_index + 1 if (page_index + 1) * PAGE_SIZE < total_num else page_index def can_preview(dataset_dir: str, dataset: st) -> "gr.Button":  try:  with open(os.path.join(dataset_dir, DATA_CONFIG), encoding="utf-8") as f:  dataset_info = json.load(f)  except Exception:  return gr.Button(interactive=False)  if len(dataset) == 0 or "file_name" not in dataset_info[dataset[0]]:  return gr.Button(interactive=False)  data_path = os.path.join(dataset_dir, dataset_info[dataset[0]]["file_name"])  if os.path.isfile(data_path) or (  os.path.isdir(data_path) and os.stdir(data_path)  ):  return gr.Button(interactive=True)  else:  return gr.Button(interactive=False) def _load_data_file(file_path: str) -> st[Any]:  with open(file_path, encoding="utf-8") as f:  if file_path.endswith(".json"):  return json.load(f)  ef file_path.endswith(".jsonl"):  return [json.loads(ne) for ne in f]  else:  return st(f) def get_preview(  dataset_dir: str, dataset: st, page_index: int ) -> Tuple[int, st, "gr.Column"]:  with open(os.path.join(dataset_dir, DATA_CONFIG), encoding="utf-8") as f:  dataset_info = json.load(f)  data_path = os.path.join(dataset_dir, dataset_info[dataset[0]]["file_name"])  if os.path.isfile(data_path):  data = _load_data_file(data_path)  else:  data = []  for file_name in os.stdir(data_path):  data.extend(_load_data_file(os.path.join(data_path, file_name)))  return (  len(data),  data[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)],  gr.Column(visible=True),  ) def create_preview_box(  dataset_dir: "gr.Textbox", dataset: "gr.Dropdown" ) -> Dict[str, "Component"]:  data_preview_btn = gr.Button(interactive=False, scale=1)  with gr.Column(visible=False, elem_classes="modal-box") as preview_box:  with gr.Row():  preview_count = gr.Number(value=0, interactive=False, precision=0)  page_index = gr.Number(value=0, interactive=False, precision=0)  with gr.Row():  prev_btn = gr.Button()  next_btn = gr.Button()  close_btn = gr.Button()  with gr.Row():  preview_samples = gr.JSON()  dataset.change(  can_preview, [dataset_dir, dataset], [data_preview_btn], queue=False  ).then(lambda: 0, outputs=[page_index], queue=False)  data_preview_btn.cck(  get_preview,  [dataset_dir, dataset, page_index],  [preview_count, preview_samples, preview_box],  queue=False,  )  prev_btn.cck(prev_page, [page_index], [page_index], queue=False).then(  get_preview,  [dataset_dir, dataset, page_index],  [preview_count, preview_samples, preview_box],  queue=False,  )  next_btn.cck(  next_page, [page_index, preview_count], [page_index], queue=False  ).then(  get_preview,  [dataset_dir, dataset, page_index],  [preview_count, preview_samples, preview_box],  queue=False,  )  close_btn.cck(  lambda: gr.Column(visible=False), outputs=[preview_box], queue=False  )  return dict(  data_preview_btn=data_preview_btn,  preview_count=preview_count,  page_index=page_index,  prev_btn=prev_btn,  next_btn=next_btn,  close_btn=close_btn,  preview_samples=preview_samples,  ) 