| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 
 | skf = KFold(n_splits=FOLDS,shuffle=True,random_state=SEED)oof_pred = []; oof_tar = []; oof_val = []; oof_f1 = []; oof_ids = []; oof_folds = []
 preds = np.zeros((count_data_items(files_test),1))
 
 for fold,(idxT,idxV) in enumerate(skf.split(np.arange(num_train_files))):
 
 if DEVICE=='TPU':
 if tpu: tf.tpu.experimental.initialize_tpu_system(tpu)
 
 
 files_train = tf.io.gfile.glob([GCS_PATH[fold] + '/train%.2i*.tfrec'%x for x in idxT])
 np.random.shuffle(files_train);
 files_valid = tf.io.gfile.glob([GCS_PATH[fold] + '/train%.2i*.tfrec'%x for x in idxV])
 files_test = np.sort(np.array(tf.io.gfile.glob(GCS_PATH[fold] + '/test*.tfrec')))
 
 print('#'*25); print('#### FOLD',fold+1)
 print('#### Image Size: (%i, %i) | model: %s | batch_size %i'%
 (IMG_SIZES[fold][0],IMG_SIZES[fold][1],EFNS[EFF_NETS[fold]].__name__,BATCH_SIZES[fold]*REPLICAS))
 train_images = count_data_items(files_train)
 val_images   = count_data_items(files_valid)
 print('#### Training: %i | Validation: %i'%(train_images, val_images))
 
 
 K.clear_session()
 with strategy.scope():
 model = build_model(dim=IMG_SIZES[fold],ef=EFF_NETS[fold])
 print('#'*25)
 
 sv = tf.keras.callbacks.ModelCheckpoint(
 'fold-%i.h5'%fold, monitor='val_auc', verbose=0, save_best_only=True,
 save_weights_only=True, mode='max', save_freq='epoch')
 
 
 print('Training...')
 history = model.fit(
 get_dataset(files_train, augment=AUGMENT, shuffle=True, repeat=True,
 dim=IMG_SIZES[fold], batch_size = BATCH_SIZES[fold]),
 epochs=EPOCHS[fold],
 callbacks = [sv,get_lr_callback(BATCH_SIZES[fold])],
 steps_per_epoch=count_data_items(files_train)/BATCH_SIZES[fold]//REPLICAS,
 validation_data=get_dataset(files_valid,augment=False,shuffle=False,
 repeat=False,dim=IMG_SIZES[fold]),
 
 verbose=VERBOSE
 )
 
 
 print('Loading best model...')
 model.load_weights('fold-%i.h5'%fold)
 
 
 print('Predicting OOF with TTA...')
 ds_valid = get_dataset(files_valid,labeled=False,return_image_ids=False,augment=AUGMENT,
 repeat=True,shuffle=False,dim=IMG_SIZES[fold],batch_size=BATCH_SIZES[fold]*2)
 ct_valid = count_data_items(files_valid); STEPS = TTA * ct_valid/BATCH_SIZES[fold]/2/REPLICAS
 pred = model.predict(ds_valid,steps=STEPS,verbose=VERBOSE)[:TTA*ct_valid,]
 oof_pred.append( np.mean(pred.reshape((ct_valid,TTA),order='F'),axis=1) )
 
 
 ds_valid = get_dataset(files_valid, augment=False, repeat=False, dim=IMG_SIZES[fold],
 labeled=True, return_image_ids=True)
 oof_tar.append( np.array([target.numpy() for img, target in iter(ds_valid.unbatch())]) )
 oof_folds.append( np.ones_like(oof_tar[-1],dtype='int8')*fold )
 ds = get_dataset(files_valid, augment=False, repeat=False, dim=IMG_SIZES[fold],
 labeled=False, return_image_ids=True)
 oof_ids.append( np.array([img_id.numpy().decode("utf-8") for img, img_id in iter(ds.unbatch())]))
 
 
 print('Predicting Test with TTA...')
 ds_test = get_dataset(files_test,labeled=False,return_image_ids=False,augment=AUGMENT,
 repeat=True,shuffle=False,dim=IMG_SIZES[fold],batch_size=BATCH_SIZES[fold]*2)
 ct_test = count_data_items(files_test); STEPS = TTA * ct_test/BATCH_SIZES[fold]/2/REPLICAS
 pred = model.predict(ds_test,steps=STEPS,verbose=VERBOSE)[:TTA*ct_test,]
 preds[:,0] += np.mean(pred.reshape((ct_test,TTA),order='F'),axis=1) * WGTS[fold]
 
 
 auc = roc_auc_score(oof_tar[-1],oof_pred[-1])
 oof_val.append(np.max( history.history['val_auc'] ))
 print('#### FOLD %i OOF AUC without TTA = %.3f, with TTA = %.3f'%(fold+1,oof_val[-1],auc))
 
 
 if DISPLAY_PLOT:
 plt.figure(figsize=(15,5))
 plt.plot(np.arange(len(history.history['auc'])),history.history['auc'],'-o',label='Train auc',color='#ff7f0e')
 plt.plot(np.arange(len(history.history['auc'])),history.history['val_auc'],'-o',label='Val auc',color='#1f77b4')
 x = np.argmax( history.history['val_auc'] ); y = np.max( history.history['val_auc'] )
 xdist = plt.xlim()[1] - plt.xlim()[0]; ydist = plt.ylim()[1] - plt.ylim()[0]
 plt.scatter(x,y,s=200,color='#1f77b4'); plt.text(x-0.03*xdist,y-0.13*ydist,'max auc\n%.2f'%y,size=14)
 plt.ylabel('auc',size=14); plt.xlabel('Epoch',size=14)
 plt.legend(loc=2)
 plt2 = plt.gca().twinx()
 plt2.plot(np.arange(len(history.history['auc'])),history.history['loss'],'-o',label='Train Loss',color='#2ca02c')
 plt2.plot(np.arange(len(history.history['auc'])),history.history['val_loss'],'-o',label='Val Loss',color='#d62728')
 x = np.argmin( history.history['val_loss'] ); y = np.min( history.history['val_loss'] )
 ydist = plt.ylim()[1] - plt.ylim()[0]
 plt.scatter(x,y,s=200,color='#d62728'); plt.text(x-0.03*xdist,y+0.05*ydist,'min loss',size=14)
 plt.ylabel('Loss',size=14)
 plt.title('FOLD %i - Image Size (%i, %i), %s'%
 (fold+1,IMG_SIZES[fold][0],IMG_SIZES[fold][1],EFNS[EFF_NETS[fold]].__name__),size=18)
 plt.legend(loc=3)
 plt.savefig(f'fig{fold}.png')
 plt.show()
 
 
 |