I have below code in python. I am getting error when creating object (NSLKDDFeatureAnalysis nsl_data(nsl_kdd_dataset)) . How can I create a object of class in python. I am new to python. Kindly help
# Python imports e.g., os, math...
from collections import defaultdict
import argparse
import joblib
import os
# Third party libraries imports
# Numpy imports e.g., numpy,..
import numpy as np
# Pandas imports e.g., pandas...
import pandas as pd
# sklearn imports e.g., linear_model,..
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
# Azure imports e.g., Workspace,...
from azureml.core.run import Run
from azureml.data.dataset_factory import TabularDatasetFactory
# VRK: Create TabularDataset using TabularDatasetFactory
# create nsl-kdd network train data
nsl_kdd_webpath = [
'https://github.com/venkataravikumaralladi/AzureMLCapstoneProject/blob/main/KDDTrain%2B.txt'
]
#create network analysis data set in tabular format using TabularDatasetFactory
nsl_kdd_dataset = TabularDatasetFactory.from_delimited_files(path=nsl_kdd_webpath)
class NSLKDDFeatureAnalysis:
# class variables
network_data_column_names = [
'duration', 'protocol_type', 'service',
'flag', 'src_bytes', 'dst_bytes',
'land', 'wrong_fragment', 'urgent',
'hot', 'num_failed_logins', 'logged_in',
'num_compromised', 'root_shell', 'su_attempted',
'num_root', 'num_file_creations', 'num_shells',
'num_access_files', 'num_outbound_cmds', 'is_hot_login',
'is_guest_login',
'count', 'srv_count', 'serror_rate',
'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate',
'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate',
'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate',
'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
'dst_host_srv_rerror_rate',
'attack_type',
'success_pred' ]
trained_column_names_dummy = []
strd_scalar_continious = 0
def __init__(self, data):
self.train_data = data
def clean_data(self):
train_df = self.train_data.to_pandas_dataframe().dropna()
train_df.columns = NSLKDDFeatureAnalysis.network_data_column_names
# For this analysis we drop "success_pred" column
train_df.drop('success_pred', axis=1, inplace=True)
# Drop attack type in training data which is to be predicted.
train_X = train_df.drop("attack_type", axis=1)
train_Y = train_df['attack_type']
# convert categorical types to dummy variables.
feature_type_to_names_mapping = defaultdict(list)
with open('KDDDataFeatureNamesToTypes.txt', 'r') as f:
#read from line 1. skip line 0 as in given file we have attack names here which we don't need.
for line in f.readlines()[1:]:
name, nature = line.strip()[:-1].split(': ')
feature_type_to_names_mapping[nature].append(name)
# Generate dummy variables for categorical types
train_data_X = pd.get_dummies(train_X, columns=feature_type_to_names_mapping['symbolic'], drop_first=False)
# standarize continious feature
continuous_features = feature_type_to_names_mapping['continuous']
NSLKDDFeatureAnalysis.strd_scalar_continious = StandardScaler().fit(train_data_X[continuous_features])
# Standardize training data
train_data_X[continuous_features] = NSLKDDFeatureAnalysis.strd_scalar_continious.transform(train_data_X[continuous_features])
# we build binary classifier for this.
train_Y = train_Y.apply(lambda x: 0 if x == 'normal' else 1)
NSLKDDFeatureAnalysis.trained_column_names_dummy = train_data_X.columns
return train_data_X, train_Y
def main():
run = Run.get_context()
# Add arguments to script
parser = argparse.ArgumentParser()
parser.add_argument('--criterion', default='gini', help="The function to measure the quality of a split.")
parser.add_argument('--max_depth', type=int, default=None, help="The maximum depth of the tree")
args = parser.parse_args()
run.log("Criteria for split:", args.criterion)
run.log("Max depth:", args.max_depth)
# VRK: Data cleaning step
NSLKDDFeatureAnalysis nsl_data(nsl_kdd_dataset) **-----> Error here Invalid syntax**
x, y = nsl_data_analysis.clean_data()
# VRK: Split data into train and test sets.
x_train, x_test, y_train, y_test = train_test_split(x,y)
decisiontree_attack_classifier = DecisionTreeClassifier(criterion=args.criterion, max_depth=args.max_depth)
decisiontree_attack_classifier.fit(x_train, y_train)
accuracy = decisiontree_attack_classifier.score(x_test, y_test)
#VRK:Save the model.
os.makedirs('outputs', exist_ok=True)
joblib.dump(decisiontree_attack_classifier, 'outputs/vrk_ids_model.joblib')
run.log("Accuracy", np.float(accuracy))
return
if __name__ == '__main__':
main()
Source: Python-3x Questions
One Reply to “invalid syntax in creating class object in python”
Even I don’t understand what you try to do in this line – so Python also has problem to understand you.
If you want to create instance then you need something like
nsl_data_analysis = NSLKDDFeatureAnalysis( nsl_kdd_dataset )