How to extend a pandas function from one column function to multiple column function

Here’s my function

def get_IV(df, feature, target):
    lst = []

    unique_values = df[feature].unique()
    for val in unique_values:
        lst.append([feature,                                                        # Feature name
                    val,                                                            # Value of a feature (unique)
                    df[(df[feature] == val) & (df[target] == 0)].count()[feature],  # Good (Fraud == 0)
                    df[(df[feature] == val) & (df[target] == 1)].count()[feature]   # Bad  (Fraud == 1)
                   ])
 
    data = pd.DataFrame(lst, columns=['Variable', 'Value', 'Good', 'Bad'])
        
    total_bad = df[df[target] == 1].count()[feature]
    total_good = df.shape[0] - total_bad
    
    data['Distribution Good'] = data['Good']/ total_good
    data['Distribution Bad'] = data['Bad'] / total_bad
    data['WoE'] = np.log(data['Distribution Good'] / data['Distribution Bad'])

    data = data.replace({'WoE': {np.inf: 0, -np.inf: 0}})

    data['IV'] = data['WoE'] * (data['Distribution Good'] - data['Distribution Bad'])

    data = data.sort_values(by=['Variable', 'Value'], ascending=[True, True])
    data.index = range(len(data.index))

    iv = data['IV'].sum()

    return iv

But for using this for 100 columns I need use that in 100 times such as:

get_IV(data, 'Column1', 'Label'),
get_IV(data, 'Column2', 'Label'),

get_IV(data, 'Column100', 'Label')

What I need is change the input

cols = ['Column1', 'Column2', ..., 'Column100' ]

And new function like this

get_IV(data, cols, 'Label')

and my expected output is a dataframe like this

Column               IV
Column1         0.00035
Column2         0.01085
...
Column100       0.01281

Source: Python Questions

LEAVE A COMMENT