from data_utils.tense_inflection_helpers import read_ti_data, sent_to_pos

def split_test_data(split):
    val_data, _ = read_ti_data(splits=[split], include_only_present=True)
    sents_r = []
    sents_p = []
    for sent in val_data:
        if "PAST" in sent: continue
        pos_tags = sent_to_pos(sent)
        if pos_tags[2] == "R":
            sents_r.append(sent)
        else:
            sents_p.append(sent)
    print("RC sent: ", len(sents_r))
    print("PREP sent: ", len(sents_p))

    # write sents_r to file
    with open(f"data_utils/tense_inflection_data/tense.{split}_rc", "w") as f:
        for sent in sents_r:
            f.write(sent + "\n")
    # write sents_p to file
    with open(f"data_utils/tense_inflection_data/tense.{split}_prep", "w") as f:
        for sent in sents_p:
            f.write(sent + "\n")


if __name__ == '__main__':
    split = 'val'
    split_test_data(split)