# extract possible dialogue correction in dataset
import datasets
from datasets import load_dataset

data = load_dataset('daily_dialog')

# could expand
key_words = ['sorry', 'error', 'actual', 'correct', 'mistake', 'mistook', 'oops', 'apologize', 'misunder', 'underst', 'clarif', 'should be', 'forget about', 'forgot about', 'wrong']
possible_index_list = {'train': [], 'validation': [], 'test': []}


for d in ['train', 'validation', 'test']:
    for i,v in enumerate(data[d]['dialog']):
        if any(kw in dia.lower() for dia in v for kw in key_words):
            possible_index_list[d].append(i)

####################

for i in possible_index_list['test']:
    print(f"at test {i}:")
    for conv in data['test']['dialog'][i]:
        if any(kw in conv.lower() for kw in key_words):
            print(conv)
    print('============')

"""
at test 5:
 I think it ’ s a distance of 180 kilometers from here to London , so it should be a two-hour drive on the motorway .
at test 16:
 I'm afraid there's been a mistake .
at test 23:
 Actually , fruits and veggies are really good for you .
at test 77:
 I'm sorry to bring this up , but would it be possible for you to write me a letter of recommendation before you go ?
at test 102:
 Sorry , I forgot . I don ’ t like seafood , neither .
at test 121:
 Oops , cancel that . Change the second call to 7 thirty will you , please ?
at test 124:
 Actually , the company will provide you with all of these supplies . So , you can leave this very thick notebook at home .
at test 163:
 It should be due in at 7:30 a . m.tomorrow .
at test 369:
 Darling . I think you have made a mistake .
 I have made a mistake ?
 July 20 ? But I think it should be June 20 .
 Yes . I remember now . We put it off . I did make a mistake . But how shall we deal with the tickets ?
at test 376:
 Well , actually two-thirds of Americans may avoid these places .
at test 580:
 It ’ s traditional Chinese Medicine . I mix it with hot water like tea . Sorry . I forgot about it .
 Oh no ! I forgot about those too ! Those are Chinese sausages for my Aunt Lily .
at test 666:
 Oh , sorry . I'm always absent-minded . I remember that I've put a note in my pocket .
at test 836:
 I completely forgot about your cat allergy . I took care of a cat for my friend here a few days ago . Oh no . we'd better go to the doctor and get some medicine . Let's go .
"""

for i in possible_index_list['validation']:
    print(f"at validation {i}:")
    for conv in data['validation']['dialog'][i]:
        if any(kw in conv.lower() for kw in key_words):
            print(conv)
    print('============')

"""
at validation 22:
 Believe it or not , it has the opposite effect . Employees are actually more productive on casual days .
at validation 35:
Excuse me . Something is wrong with my bank card . Can you help me ?
at validation 64:
 Oops , no , Daddy can't watch American Idol , either !
at validation 135:
 Thanks . I'm sorry to say that you need to get this stamped . There is no stamp on this slip and it is essential before I can process it for you .
at validation 198:
 That was the only thing that I saw that was wrong with the apartment .
at validation 405:
I'm sorry , but I didn't catch what you said .
at validation 407:
 Sorry , I don ’ t know the rule about punching .
at validation 432:
 Oh , I'm sorry . should have been 2135-3668 , not 3678 . I've given you a wrong number .
at validation 552:
 One moment , please . I have to check if there are rooms available . I'm sorry , ladies . We have only two double rooms available but they are on different floors . Would
you mind that ?
at validation 599:
 I'm sorry , hold on a moment . I'll check it out . But we have already shipped it to you last month . Would you like us to contact the express company to know what's going
 on ?
at validation 699:
 I'm embarrassed ! I forgot completely about them . I'm terribly sorry .
at validation 926:
 I'm sorry . Something is wrong with my taxi .
"""

for i in possible_index_list['train']:
    print(f"at train {i}:")
    for conv in data['train']['dialog'][i]:
        if any(kw in conv.lower() for kw in key_words):
            print(conv)
    print('============')

"""
at train 134:
Sam , I am so sorry . It was your birthday yesterday and I completely forgot about it .
at train 181:
Maybe you can correct it by going to a driving range before you play again .
at train 315:
 There ’ s problem with my bank statement . There ’ s a mistake on it . I also need to withdraw some money some the ATM .
at train 592:
 I wouldn't say that.They seem to be on good terms but actually they always speak ill of each other .
at train 1321:
 Wrong . lt's not a place name , but a passionate act .
at train 1413:
 No , it sounds wrong . He was born in the 16th century .
at train 1421:
 I'm sorry , I didn't mean to forget our wedding anniversary .
at train 2004:
 I thought she was going to call when she was done shopping . It was a misunderstanding . She was literally screaming on the phone over this .
at train 2688:
Excuse me , Professor . I think there might be an error in my test score .
 I think that the percentage is incorrect .
at train 2755:
 I think you must have heard wrong.The truth is we are going to be taken over by Trusten.We are being bought out , and our company will be merging with our largest competit
or.It ' s not good news at all ...
at train 2873:
 Oh , I ’ m sorry . It completely slipped my mind .
at train 3511:
I'd like to apologize for my carelessness . May I clean it up for you ?
at train 4108:
 Well , Yes . There are something wrong actually . Perhaps you can give me some advice .
at train 5738:
 there ’ s problem with my bank statement . There ’ s a mistake on it . I also need to withdraw some money form the ATM .
at train 5752:
 It looks like some kind of mistake .
at train 6618:
 Sandy , I think we made a mistake ...
at train 6768:
 I think there's been a misunderstanding !
at train 7872:
 Thank you for pointing that out . I mistakenly gave you your friend's breakfast .
at train 7921:
 Oh , I am sorry sir . I forgot to explain that to you . This one is an allowance slip . We made a mistake in your bill and overcharged you 120 dollars . So we had to write up an allowance slip and deduct 120 dollars from your bill .
at train 9000:
 Oh , my mistake . The reservation is for a suite and it is a non-smoking room with a king bed . I'm sorry for the error .
at train 9053:
 Did you find a lot of things wrong during the home inspection ?
at train 9782:
 I'm afraid there has been a mistake .
at train 10373:
 Oh . I made a mistake . I thought the guy on the right was Peckham .
at train 10392:
 I apologize . This should not have to be this way .
at train 10739:
 Even if the language is accurate , a misunderstanding can not be avoided due to cultural differences .
"""