MCCの計算

この指標は普通は使わない気がする。

class MCC:
    def conf(self, psp_true, psp_pred):
        """
        psp_true: shape = (n_data, 1) のarrayとして渡す
        """
        #==== epsp ==== 
        y_true = np.ravel(psp_true)
        y_pred = np.ravel(psp_pred)
        tp_epsp = (y_true >= 1) & (y_pred > 0)
        fp_epsp = (y_true <= 0) & (y_pred > 0)
        fn_epsp= (y_true >= 1) & (y_pred <= 0)
        tn_epsp = (y_true <= 0) & (y_pred <= 0)
        #==== ipsp ====
        y_true = np.ravel(psp_true)
        y_pred = np.ravel(psp_pred)
        tp_ipsp = (y_true < 0) & (y_pred < 0)
        fp_ipsp = (y_true >= 0) & (y_pred < 0)
        fn_ipsp = (y_true < 0) & (y_pred >= 0)
        tn_ipsp = (y_true >= 0) & (y_pred >= 0)
        #--------- For plot------------------
        self.tps = [tp_epsp.sum(), tp_ipsp.sum()]
        self.fps = [fp_epsp.sum(), fp_ipsp.sum()]
        self.fns = [fn_epsp.sum(), fn_ipsp.sum()]
        self.tns = [tn_epsp.sum(), tn_ipsp.sum()]
        # mcc = self.compute(tps, fps, fns, tns)
        
    def compute(self):
        mcc = 0
        for tp, fp, fn, tn in zip(self.tps, self.fps, self.fns, self.tns):
            if (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn) == 0:
                return 0
            mcc += (tp*tn-fp*fn)/np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
        return mcc/2

参考文献 Matthews, Brian W. "Comparison of the predicted and observed secondary structure of T4 phage lysozyme." Biochimica et Biophysica Acta (BBA)-Protein Structure 405.2 (1975): 442-451.