import numpy as np import pandas as pd import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn import datasets from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA iris = datasets.load_iris() X = iris.data y = iris.target scaler = StandardScaler() X_scaled = scaler.fit_transform(X) cov_matrix = np.cov(X_scaled.T) print("Covariance Matrix:\n", cov_matrix) eigenvalues, eigenvectors = np.linalg.eig(cov_matrix) print("Eigenvalues:", eigenvalues) print("Eigenvectors:\n", eigenvectors) fig = plt.figure(figsize=(8, 6)) ax = fig.add_subplot(111, projection="3d") colors = ["red", "green", "blue"] labels = iris.target_names for i in range(len(colors)): ax.scatter(X_scaled[y == i, 0], X_scaled[y == i, 2], c=colors[i], label=labels[i]) ax.set_xlabel("Sepal Length") ax.set_ylabel("Sepal Width") ax.set_zlabel("Petal Length") ax.set_title("3D visualization of Iris data before PCA") plt.legend() plt.show() pca = PCA(n_components=2) X_pca = pca.fit_transform(X_scaled) pca_df = pd.DataFrame(data=X_pca, columns=["PC1", "PC2"]) pca_df["Target"] = y explained_variance = pca.explained_variance_ratio_ print(f"Explained Variance by PC1: {explained_variance[0]:.2f}") print(f"Explained Variance by PC2: {explained_variance[1]:.2f}") plt.figure(figsize=(8, 6)) scatter = plt.scatter( pca_df["PC1"], pca_df["PC2"], c=pca_df["Target"], cmap="viridis", edgecolor="k" ) plt.title("PCA of Iris Dataset (4 Features Reduced to 2)") plt.xlabel("Principal Component 1") plt.ylabel("Principal Component 2") plt.legend(handles=scatter.legend_elements()[0], labels=labels) plt.show()