废话不多说,直接上代码

# import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib  # 用于保存和加载模型

# 1. 导入数据(假设你有一个包含特征和目标值的CSV文件)
# 例如,data.csv 中有若干特征列和一个名为 'target' 的目标列
data = pd.read_csv('FuelConsumptionCo21.csv')

# 2. 准备数据
# 分离特征和目标值
X = data.drop('target', axis=1)
y = data['target']

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. 创建并训练模型
model = LinearRegression()
model.fit(X_train, y_train)

print(model.coef_)
print(model.intercept_)

# 4. 进行预测并评估模型
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


# 5. 保存模型
joblib.dump(model, 'linear_regression_model.pkl')

# 6. 加载模型(可选,验证模型是否正确保存和加载)
loaded_model = joblib.load('linear_regression_model.pkl')
loaded_y_pred = loaded_model.predict(X_test)
loaded_mse = mean_squared_error(y_test, loaded_y_pred)
print(f'Mean Squared Error after loading: {loaded_mse}')

原始数据是汽车 二氧化碳污染指标排放,数据如下

ENGINESIZECYLINDERSFUELCONSUMPTION_CITYFUELCONSUMPTION_HWYFUELCONSUMPTION_COMBFUELCONSUMPTION_COMB_MPGtarget
249.96.78.533196
2.4411.27.79.629221
1.5465.85.948136
3.5612.79.111.125255
3.5612.18.710.627244
3.5611.97.71028230
3.5611.88.110.128232
3.7612.8911.125255
3.7613.49.511.624267
2.4410.67.59.231212
2.4411.28.19.829225
3.5612.18.310.427239
5.9121812.615.618359
5.9121812.615.618359
4.7817.411.314.719338
4.7818.112.215.418354
4.7817.411.314.719338
4.7818.112.215.418354
5.9121812.615.618359
249.97.48.832202
2411.58.11028230
2410.87.59.330214

点赞(0) 打赏

评论列表 共有 0 条评论

暂无评论

微信公众账号

微信扫一扫加关注

发表
评论
返回
顶部