import numpy as np from src.preprocess import clean_smiles_in_csv TOX21_TASKS = [ "NR-AhR","NR-AR","NR-AR-LBD","NR-Aromatase","NR-ER","NR-ER-LBD","NR-PPAR-gamma","SR-ARE","SR-ATAD5","SR-HSE","SR-MMP","SR-p53" ] def prepare_data(data_path, save_path_clean_data, save_path_valid_mask): valid_mask_train = clean_smiles_in_csv(data_path, save_path_clean_data, "smiles", TOX21_TASKS) np.save(save_path_valid_mask, valid_mask_train) train_path = "./tox21/tox21_train.csv" val_path = "./tox21/tox21_validation.csv" train_path_clean = "./tox21/tox21_train_clean.csv" val_path_clean = "./tox21/tox21_validation_clean.csv" prepare_data(train_path, train_path_clean, "./tox21/valid_mask_train.npy") prepare_data(val_path, val_path_clean, "./tox21/valid_mask_val.npy")