import os def run(cmd): print("\n======================================") print("Running command:") print(cmd) print("======================================\n") os.system(cmd) def generate_features(data_path, save_path): run( f"python scripts/save_features.py " f"--data_path {data_path} " f"--save_path {save_path} " f"--features_generator rdkit_2d_normalized " f"--restart" ) def predict_from_csv(data_path, features_path, checkpoint_dir, output_path): predict_cmd = ( f"python main.py predict " f"--data_path {data_path} " f"--features_path {features_path} " f"--checkpoint_dir {checkpoint_dir} " f"--no_features_scaling " f"--output {output_path}" ) run(predict_cmd) def finetune(train_path, val_path, train_features_path, val_features_path, save_dir, checkpoint_path, args ): finetune_cmd = ( f"python main.py finetune " f"--data_path {train_path} " f"--split_type random " f"--split_sizes 1 0 0 " f"--separate_val_path {val_path} " f"--separate_test_path {val_path} " f"--features_path {train_features_path} " f"--separate_val_features_path {val_features_path} " f"--separate_test_features_path {val_features_path} " f"--save_dir {save_dir} " f"--checkpoint_path {checkpoint_path} " f"--dataset_type classification " f"--num_folds 1 " f"--ensemble_size 1 " f"--no_features_scaling " f"--ffn_hidden_size {args['ffn_hidden_size']} " f"--ffn_num_layers {args['ffn_num_layer']} " f"--batch_size {args['batch_size']} " f"--epochs 100 " f"--init_lr {args['real_init_lr']} " f"--final_lr {args['real_final_lr']} " f"--max_lr {args['max_lr']} " f"--dropout {args['dropout']} " f"--attn_hidden {args['attn_hidden']} " f"--attn_out {args['attn_out']} " f"--dist_coff {args['dist_coff']} " f"--bond_drop_rate {args['bond_drop_rate']} " ) run(finetune_cmd)