import os
import shutil
import numpy as np

def custom_prep_IDCdata1(save_path):
    out_dir = save_path
    IDC_dir=save_path+'_original'
    # If dataset already downloaded an unpacked, do nothing
    if os.path.isdir(IDC_dir):
        print('the director original has been created')
        return
    os.rename(out_dir, IDC_dir)
    for p in os.listdir(IDC_dir):
        for h in os.listdir(os.path.join(IDC_dir,p)):
            if not os.path.exists(os.path.join(out_dir,h)):
                os.makedirs(os.path.join(out_dir,h))
            for img in os.listdir(os.path.join(IDC_dir,p,h)):
                shutil.copy(os.path.join(IDC_dir,p,h,img),os.path.join(out_dir, h))    
                
save_path='./data/IDC_regular_ps50_idx5'        
custom_prep_IDCdata1(save_path)  
        
def custom_prep_IDCdata2(save_path):
    out_dir1 = './data/IDC_train'
    out_dir2='./data/IDC_test'
    neg_img_paths=[]
    pos_img_paths=[]
    for h in os.listdir(save_path):
        if not os.path.exists(os.path.join(out_dir1,h)):
            os.makedirs(os.path.join(out_dir1,h))
        if not os.path.exists(os.path.join(out_dir2,h)):
            os.makedirs(os.path.join(out_dir2,h))
        for img in os.listdir(os.path.join(save_path,h)):
            if int(h)==0:
                neg_img_paths.append(os.path.join(save_path,h,img))
            else :
                pos_img_paths.append(os.path.join(save_path,h,img))
    seed=42
    np.random.seed(seed)
    np.random.shuffle(neg_img_paths)
    np.random.seed(seed)
    np.random.shuffle(pos_img_paths)
    #construct the train set 
    for i in range(29164):
        shutil.copy(neg_img_paths[i],os.path.join(out_dir1, '0'))
    for i in range(10788):
        shutil.copy(pos_img_paths[i],os.path.join(out_dir1, '1'))
    #construct the test set
    for i in range(29164,29164+31825):
        shutil.copy(neg_img_paths[i],os.path.join(out_dir2, '0'))
    for i in range(10788,10788+11595):
        shutil.copy(pos_img_paths[i],os.path.join(out_dir2, '1'))
        
            
save_path='./data/IDC_regular_ps50_idx5'        
custom_prep_IDCdata2(save_path)
