import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression

stars = pd.read_csv("/content/6 class csv.csv")
stars.head()

import matplotlib.pyplot as plt
import seaborn as sns

sns.pairplot(stars, hue="Spectral Class")
plt.show()

target_variable = 'Spectral Class'
features = stars.columns[stars.columns != target_variable]

for feature in features:
    plt.figure(figsize=(5, 4))
    plt.scatter(stars[feature], stars[target_variable], color="red", alpha=0.8)
    plt.xlabel(feature)
    plt.ylabel(target_variable)
    plt.grid(True)
    plt.show()

from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor(max_depth=5)

from sklearn.preprocessing import LabelEncoder

# Create a LabelEncoder object
encoder = LabelEncoder()

# Fit the encoder to the 'Spectral Class' column and transform it
stars['Spectral Class Encoded'] = encoder.fit_transform(stars['Spectral Class'])

# Update the target variable to the encoded column
target_variable = 'Spectral Class Encoded'

# --- Encoding 'Star color' and other categorical features ---
features = stars.columns[stars.columns != target_variable].tolist()  # Convert features to a list for easier manipulation

# List of categorical features to encode (add any other suspected categorical features)
categorical_features = ['Star color']  # Removed 'Spectral Class' as it's already encoded

# Iterate through categorical features and encode them
for feature in categorical_features:
    if feature in features:
        stars[feature + 'Encoded'] = encoder.fit_transform(stars[feature])
        # Replace the original feature with the encoded feature in the features list
        features.remove(feature)
        features.append(feature + 'Encoded')

# --- Ensure 'Spectral Class' is not in features and 'Spectral Class Encoded' is ---
if 'Spectral Class' in features:
    features.remove('Spectral Class')
if 'Spectral Class Encoded' not in features:
    features.append('Spectral Class Encoded')

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

model = DecisionTreeRegressor(ccp_alpha=0.01)  # Set ccp_alpha

x = stars[features]
y = stars[target_variable]

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.9, random_state=42)

model.fit(x,y)
y_pred = model.predict(x)

r2_score(y_pred, y)

0.999094526144226

from sklearn.ensemble import RandomForestRegressor

new_model = RandomForestRegressor(max_depth=5)

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.9, random_state=42)

new_model.fit(x_train,y_train)

r2_score(new_model.predict(x_test),y_test)

0.9646125007917806

	Temperature (K)	Luminosity(L/Lo)	Radius(R/Ro)	Absolute magnitude(Mv)	Star color	Spectral Class
0	3068	0.002400	0.1700	16.12	Red	M
1	3042	0.000500	0.1542	16.60	Red	M
2	2600	0.000300	0.1020	18.70	Red	M
3	2800	0.000200	0.1600	16.65	Red	M
4	1939	0.000138	0.1030	20.06	Red	M