Saturday, May 31, 2025

ML LAB

LAB 1

#1st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing(as_frame=True)
df = data.frame
numeric_cols = df.select_dtypes(include=[np.number]).columns
plt.figure(figsize=(15, 10))
for i, col in enumerate(numeric_cols):
    plt.subplot(3, 3, i + 1)
    sns.histplot(df[col], bins=30, kde=True, color='skyblue')
    plt.title(f'{col} Distribution')
plt.tight_layout()
plt.show()
plt.figure(figsize=(15, 10))
for i, col in enumerate(numeric_cols):
    plt.subplot(3, 3, i + 1)
    sns.boxplot(x=df[col], color='salmon')
    plt.title(f'{col} Box Plot')
plt.tight_layout()
plt.show()

print("Outlier Counts:")
for col in numeric_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    outliers = df[(df[col] < lower) | (df[col] > upper)]
    print(f"{col}: {len(outliers)} outliers")

print("\nDataset Summary:")
print(df.describe())

LAB 2

#2nd
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
california_data = fetch_california_housing(as_frame=True)
data = california_data.frame
correlation_matrix = data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Matrix of California Housing Features')
plt.show()
sns.pairplot(data, diag_kind='kde', plot_kws={'alpha': 0.5})
plt.suptitle('Pair Plot of California Housing Features', y=1.02)
plt.show()

LAB 3

import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
data =load_iris()
X = PCA(2).fit_transform(data.data)
for i, c in zip(range (3), 'rgb'):
  plt.scatter (*X[data.target == i].T, c=c, label=data.target_names[i])
plt.title('Iris PCA'),
plt.xlabel('PC1'), plt.ylabel('PC2')
plt.legend(), plt.grid(True)
plt.show()

LAB 4

#4th
import pandas as pd
def find_s_algorithm(file_path):
    data = pd.read_csv(file_path)
    print("Training data:")
    print(data)
    attributes = data.columns[:-1]
    class_label = data.columns[-1]
    hypothesis = ['?' for _ in attributes]
    for index, row in data.iterrows():
        if row[class_label] == 'Yes':
            for i, value in enumerate(row[attributes]):
                if hypothesis[i] == '?' or hypothesis[i] == value:
                    hypothesis[i] = value
                else:
                    hypothesis[i] = '?'
    return hypothesis

file_path = 'training_data.csv'
hypothesis = find_s_algorithm(file_path)
print("\nThe final hypothesis is:", hypothesis)

LAB 5

import numpy as np, matplotlib.pyplot as plt
from collections import Counter
data = np.random.rand(100)
train, labels = data[:50], ["Class1" if x <= 0.5 else "Class2" for x in data[:50]]
test = data[50:]
def knn(p, train, labels, k):
    return Counter(lbl for _, lbl in sorted((abs(p - x), l) for x, l in zip(train, labels))[:k]).most_common(1)[0][0]
for k in [1, 3, 5, 10]:
    preds = [knn(p, train, labels, k) for p in test]
    print(f"\nk = {k}")
    for i, (val, pred) in enumerate(zip(test, preds), 51):
        print(f"x{i}: {val:.2f} → {pred}")
    plt.scatter(train, [0]*50, c=["b" if l=="Class1" else "r" for l in labels], marker="o")
    plt.scatter(test, [1]*50, c=["b" if p=="Class1" else "r" for p in preds], marker="x")
    plt.yticks([0, 1], ["Train", "Test"])
    plt.title(f"k-NN (k={k})")
    plt.grid(); plt.show()


LAB 6

import numpy as np, matplotlib.pyplot as plt

x = np.linspace(0, 2*np.pi, 100)
y = np.sin(x) + 0.1*np.random.randn(100)
t = np.linspace(0, 2*np.pi, 200)

def f(tx):
    w = np.exp(-(x - tx)**2 / 0.25)
    X = np.c_[np.ones_like(x), x]
    theta = np.linalg.pinv(X.T @ np.diag(w) @ X) @ X.T @ np.diag(w) @ y
    return np.array([1, tx]) @ theta

plt.scatter(x, y, c='r')
plt.plot(t, [f(i) for i in t], c='b')
plt.show()


LAB 7

import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error as mse, r2_score as r2


d = pd.read_csv("http://lib.stat.cmu.edu/datasets/boston", sep="\s+", skiprows=22, header=None)
x = np.hstack([d.values[::2], d.values[1::2, :2]])[:, 5:6]
y = d.values[1::2, 2]
x1, x2, y1, y2 = train_test_split(x, y)
m = LinearRegression().fit(x1, y1)
p = m.predict(x2)
plt.scatter(x2, y2); plt.plot(x2, p, c="r"); plt.title("Linear"); plt.show()
print("Linear Regression\nMSE:", mse(y2, p), "\nR²:", r2(y2, p))


u = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
df = pd.read_csv(u, sep="\s+", names=["a","b","c","d","e","f","g","h","i"], na_values="?").dropna()
x = df[["c"]]; y = df["a"]
x1, x2, y1, y2 = train_test_split(x, y)
m = make_pipeline(PolynomialFeatures(2), LinearRegression()).fit(x1, y1)
p = m.predict(x2)
plt.scatter(x2, y2); plt.scatter(x2, p, c="r", s=10); plt.title("Poly"); plt.show()
print("Polynomial Regression\nMSE:", mse(y2, p), "\nR²:", r2(y2, p))


LAB 8

from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
model = DecisionTreeClassifier().fit(X_train, y_train)
print("Accuracy:", accuracy_score(y_test, model.predict(X_test)) * 100, "%")
result = model.predict([X_test[0]])
print("Predicted Class:", "Benign" if result[0] == 1 else "Malignant")
plot_tree(model, filled=True)
plt.show()

LAB 9

from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
X, y = fetch_olivetti_faces(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, pred)*100:.2f}%")
print(classification_report(y_test, pred, zero_division=1))
for i in range(15):
    plt.subplot(3, 5, i+1)
    plt.imshow(X_test[i].reshape(64, 64), cmap='gray')
    plt.title(f"T:{y_test[i]} P:{pred[i]}")
plt.tight_layout()
plt.show()\

LAB 10

from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd, seaborn as sns, matplotlib.pyplot as plt
X, y = load_breast_cancer(return_X_y=True)
X = StandardScaler().fit_transform(X)
# KMeans and PCA
k = KMeans(n_clusters=2, random_state=0).fit(X)
p = PCA(2).fit(X)
X2 = p.transform(X)
cent = p.transform(k.cluster_centers_)
# Results
print(confusion_matrix(y, k.labels_))
print(classification_report(y, k.labels_))
# Plot
d = pd.DataFrame(X2, columns=['x', 'y'])
d['k'] = k.labels_
d['t'] = y
f, a = plt.subplots(1, 3, figsize=(18, 5))
sns.scatterplot(data=d, x='x', y='y', hue='k', palette='Set1', s=100, edgecolor='k', ax=a[0]).set(title='KMeans')
sns.scatterplot(data=d, x='x', y='y', hue='t', palette='coolwarm', s=100, edgecolor='k', ax=a[1]).set(title='True Labels')
sns.scatterplot(data=d, x='x', y='y', hue='k', palette='Set1', s=100, edgecolor='k', ax=a[2])
a[2].scatter(*cent.T, c='red', s=200, marker='X'); a[2].set_title('With Centroids')
plt.tight_layout(); plt.show()

No comments:

Post a Comment

GEN AI

 LAB 1 from gensim.downloader import load print("Loading pre-trained GloVe model (50 dimensions)...") model = load("glove-wik...