using JSON3

function read_jsonl(path, file; mmap::Bool = false)
    filepath = joinpath(path, file)
    if mmap
        data = mmap_open(filepath)
    else
        data = read(filepath)
    end
    return JSON3.read(data; jsonlines = true)
end


struct TrainingData <: DataSetConfig{Union{HasTrainSet, NoDevSet, NoTestSet}}
    path::String
end

trainset_file(::TrainingData) = "train.jsonl"

struct TrainingDataSet{T} <: DataSetConfig{Union{Indexable, ReIterable}}
    dataset::T
end

struct TrainingSample{T} <: SampleType{Union{HasText, HasGraph}}
    sample::T
end

Base.length(data::TrainingDataSet) = length(data.dataset)
Base.getindex(data::TrainingDataSet, i) = TrainingSample(data.dataset[i])
Base.getindex(data::TrainingDataSet, is::AbstractVector) = map(i->data[i], is)
Base.iterate(data::TrainingDataSet, i = 1) = 0 < i <= length(data) ? (data[i], i+1) : nothing

TrainSet(d::TrainingData; mmap = false) = TrainingDataSet(read_jsonl(d.path, trainset_file(d); mmap))

####

extract_text(ts::TrainingSample) = ts.sample.text::String
extract_graph(ts::TrainingSample) = triple2graph(ts.sample.triples)

DomainToken(ts::TrainingSample) = ts.sample.domain_token
