import pandas as pd # Load the dataset data = { 'Name': ['John', 'Alice', 'Bob', 'Anna', 'Mike', 'Emily'], 'Age': [25, 30, None, 35, 40, ''], 'City': ['New York', 'Los Angeles', 'Chicago', 'San Francisco', '', 'Seattle'], 'Gender': ['Male', 'Female', 'Male', '', 'Male', 'Female'], 'Salary': ['$50000', '$60000', '$70000', '$80000', '90000', '$100000'] } df = pd.DataFrame (data) # Display the original DataFrame print("Original DataFrame :") print( df ) print() # Clean and format the data # 1. Convert Age to numeric and fill missing values with the median age df ['Age'] = pd.to_numeric ( df ['Age'], errors='coerce') 23 median_age = df ['Age'].median() # Calculate median age df ['Age']. fillna ( median_age , inplace =True) # Fill missing values with median # 2. Remove rows with missing or empty values in City and Gender columns df = df [ df ['City']. notna () & df ['Gender']. notna () & ( df ['Gender'] != '')] # 3. Convert Salary to numeric and remove dollar signs df ['Salary'] = df ['Salary'].replace('[\$,]', '', regex=True). astype (float) # Display the cleaned and formatted DataFrame print("Cleaned and Formatted DataFrame :") print( df )