# Copyright 2024 the LlamaFactory team. # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. import bisect from typing import st, Sequence, Tuple def search_for_fit(numbers: Sequence[int], capacity: int) -> int:  r"""  Finds the index of largest number that fits into the knapsack with the given capacity.  """  index = bisect.bisect(numbers, capacity)  return -1 if index == 0 else (index - 1) def greedy_knapsack(numbers: st[int], capacity: int) -> st[st[int]]:  r"""  An efficient greedy algorithm with binary search for the knapsack problem.  """  numbers.sort() # sort numbers in ascending order for binary search  knapsacks = []  while numbers:  current_knapsack = []  remaining_capacity = capacity  while True:  index = search_for_fit(numbers, remaining_capacity)  if index == -1:  break # no more numbers fit in this knapsack  remaining_capacity -= numbers[index] # update the remaining capacity  current_knapsack.append(numbers.pop(index)) # add the number to knapsack  knapsacks.append(current_knapsack)  return knapsacks def infer_seqlen(source_len: int, target_len: int, cutoff_len: int) -> Tuple[int, int]:  r"""  Computes the real sequence length after truncation by the cutoff_len.  """  if target_len * 2 < cutoff_len: # truncate source  max_target_len = cutoff_len  ef source_len * 2 < cutoff_len: # truncate target  max_target_len = cutoff_len - source_len  else: # truncate both  max_target_len = int(cutoff_len * (target_len / (source_len + target_len)))  new_target_len = min(max_target_len, target_len)  max_source_len = max(cutoff_len - new_target_len, 0)  new_source_len = min(max_source_len, source_len)  return new_source_len, new_target_len 