with open("scrabble.txt", "r") as f:
    words = [line.rstrip() for line in f.readlines()]
valid_words = set(words)
normallines = [' '.join(eval(line)[0].split("<|pad|>")[-1].split()[20:]).replace("<|endoftext|>", "") for line in open("../normal_olmo_raw.txt", "r").readlines()]
perturbedlines = [' '.join(eval(line)[0].split("<|pad|>")[-1].split()[20:]).replace("<|endoftext|>", "") for line in open("../perturbed_olmo_raw.txt", "r").readlines()]
perturbedsum = 0
normalsum = 0
perturbedsum = 0
for line in perturbedlines:
    if (len(line) == 0):
        continue
    line = line.upper().rstrip().split()
    valid_count = 0
    for word in line:
        if word in valid_words:
            valid_count += 1
    perturbedsum += valid_count / len(line)

for line in normallines:
    if (len(line) == 0):
        continue
    line = line.upper().rstrip().split()
    valid_count = 0
    for word in line:
        if word in valid_words:
            valid_count += 1
    normalsum += valid_count / len(line)

print("Normal:", normalsum / len(normallines))
print("Perturbed:", perturbedsum / len(perturbedlines))
print((perturbedsum / len(perturbedlines)) / (normalsum / len(normallines)))