返回顶部
c

cost-prediction" 成本预测

Predict construction project costs using Machine Learning. Use Linear Regression, K-Nearest Neighbors, and Random Forest models on historical project data. Train, evaluate, and deploy cost prediction models."

作者: admin | 来源: ClawHub
源自
ClawHub
版本
V 2.0.0
安全检测
已通过
1,323
下载量
免费
免费
0
收藏
概述
安装方式
版本历史

cost-prediction"

技能名称:成本预测

详细描述:

基于机器学习的建筑成本预测

概述

基于DDC方法论(第4.5章),该技能能够利用历史数据和机器学习算法预测建筑项目成本。该方法将传统的基于专家的估算转变为数据驱动的预测。

书籍参考: Будущее: прогнозы и машинное обучение / 未来:预测与机器学习

基于历史数据的预测和预报使企业能够就项目成本和工期做出更准确的决策。
— DDC书籍,第4.5章

核心概念

历史数据 → 特征工程 → 机器学习模型 → 成本预测
│ │ │ │
▼ ▼ ▼ ▼
过去项目 准备数据 训练模型 新项目
含成本 用于机器学习 基于历史 成本预测

快速入门

python
import pandas as pd
from sklearn.modelselection import traintest_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import meanabsoluteerror, r2_score

加载历史项目数据

df = pd.readcsv(historicalprojects.csv)

特征和目标变量

X = df[[aream2, floors, complexityscore]] y = df[total_cost]

拆分数据

Xtrain, Xtest, ytrain, ytest = traintestsplit(X, y, test_size=0.2)

训练模型

model = LinearRegression() model.fit(Xtrain, ytrain)

预测

predictions = model.predict(X_test) print(fR² 分数: {r2score(ytest, predictions):.2f}) print(f平均绝对误差: ${meanabsoluteerror(y_test, predictions):,.0f})

预测新项目

new_project = [[5000, 10, 3]] # 面积, 楼层, 复杂度 cost = model.predict(new_project) print(f预测成本: ${cost[0]:,.0f})

数据准备

准备历史数据集

python
import pandas as pd
import numpy as np

def preparecostdataset(df):
为机器学习准备历史项目数据
# 选择相关特征
features = [
area_m2,
floors,
building_type,
location,
year_completed,
complexity_score,
material_quality,
total_cost
]

df = df[features].copy()

# 处理缺失值
df = df.dropna(subset=[total_cost])
df[complexityscore] = df[complexityscore].fillna(df[complexity_score].median())

# 编码分类变量
df = pd.getdummies(df, columns=[buildingtype, location])

# 计算衍生特征
df[costperm2] = df[totalcost] / df[aream2]
df[costperfloor] = df[total_cost] / df[floors]

# 根据通货膨胀调整(调整为当前年份价格)
current_year = 2024
inflation_rate = 0.03 # 3% 年通胀率
df[yearsago] = currentyear - df[year_completed]
df[adjustedcost] = df[totalcost] (1 + inflationrate) * df[yearsago]

return df

使用示例

df = pd.readcsv(projectshistory.csv) dfprepared = preparecost_dataset(df)

特征工程

python
def engineer_features(df):
创建额外特征以获得更好的预测
# 交互特征
df[areaxfloors] = df[area_m2] * df[floors]
df[areaxcomplexity] = df[aream2] * df[complexityscore]

# 多项式特征
df[areasquared] = df[aream2] 2

# 对数变换(针对偏态特征)
df[logarea] = np.log1p(df[aream2])

# 分箱特征
df[size_category] = pd.cut(
df[area_m2],
bins=[0, 1000, 5000, 10000, float(inf)],
labels=[small, medium, large, xlarge]
)

return df

机器学习模型

线性回归

python
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def trainlinearmodel(Xtrain, ytrain):
训练带标准化的线性回归模型
pipeline = Pipeline([
(scaler, StandardScaler()),
(regressor, LinearRegression())
])

pipeline.fit(Xtrain, ytrain)

# 特征重要性(系数)
coefficients = pd.DataFrame({
feature: X_train.columns,
coefficient: pipeline.namedsteps[regressor].coef
}).sort_values(coefficient, key=abs, ascending=False)

return pipeline, coefficients

使用示例

model, importance = trainlinearmodel(Xtrain, ytrain) print(特征重要性:) print(importance)

K近邻(KNN)

python
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

def trainknnmodel(Xtrain, ytrain):
训练带最优k值的KNN模型
# 标准化特征
scaler = StandardScaler()
Xscaled = scaler.fittransform(X_train)

# 使用交叉验证寻找最优k值
paramgrid = {nneighbors: range(3, 20)}
knn = KNeighborsRegressor()
gridsearch = GridSearchCV(knn, paramgrid, cv=5, scoring=negmeanabsolute_error)
gridsearch.fit(Xscaled, y_train)

print(f最优k值: {gridsearch.bestparams[nneighbors]})
print(f最优平均绝对误差: ${-gridsearch.bestscore_:,.0f})

return gridsearch.bestestimator_, scaler

使用示例

knnmodel, scaler = trainknnmodel(Xtrain, y_train)

随机森林

python
from sklearn.ensemble import RandomForestRegressor

def trainrandomforest(Xtrain, ytrain):
训练随机森林模型
rf = RandomForestRegressor(
n_estimators=100,
max_depth=10,
minsamplessplit=5,
random_state=42
)

rf.fit(Xtrain, ytrain)

# 特征重要性
importance = pd.DataFrame({
feature: X_train.columns,
importance: rf.featureimportances
}).sort_values(importance, ascending=False)

return rf, importance

使用示例

rfmodel, importance = trainrandomforest(Xtrain, y_train) print(特征重要性:) print(importance.head(10))

梯度提升

python
from sklearn.ensemble import GradientBoostingRegressor

def traingradientboosting(Xtrain, ytrain):
训练梯度提升模型
gb = GradientBoostingRegressor(
n_estimators=200,
learning_rate=0.1,
max_depth=5,
random_state=42
)

gb.fit(Xtrain, ytrain)
return gb

使用示例

gbmodel = traingradientboosting(Xtrain, y_train)

模型评估

综合评估

python
from sklearn.metrics import meanabsoluteerror, meansquarederror, r2_score
import numpy as np

def evaluatemodel(model, Xtest, ytest, modelname=模型):
综合模型评估
predictions = model.predict(X_test)

metrics = {
MAE: meanabsoluteerror(y_test, predictions),
RMSE: np.sqrt(meansquarederror(y_test, predictions)),
R²: r2score(ytest, predictions),
MAPE: np.mean(np.abs((ytest - predictions) / ytest)) * 100
}

print(f\n{model_name} 评估:)
print(f 平均绝对误差: ${metrics[MAE]:,.0f})
print(f 均方根误差: ${metrics[RMSE]:,.0f})
print(f R²: {metrics[R²]:.3f})
print(f 平均绝对百分比误差: {metrics[MAPE]:.1f}%)

return metrics, predictions

使用示例

metrics, predictions = evaluatemodel(model, Xtest, y_test, 线性回归)

比较多个模型

python
def comparemodels(models, Xtest, y_test):
比较多个模型
results = []

for name, model in models.items():
metrics, = evaluatemodel(model, Xtest, ytest, name)
metrics[模型] = name
results.append(metrics)

comparison = pd.DataFrame

标签

skill ai

通过对话安装

该技能支持在以下平台通过对话安装:

OpenClaw WorkBuddy QClaw Kimi Claude

方式一:安装 SkillHub 和技能

帮我安装 SkillHub 和 cost-prediction-1776344306 技能

方式二:设置 SkillHub 为优先技能安装源

设置 SkillHub 为我的优先技能安装源,然后帮我安装 cost-prediction-1776344306 技能

通过命令行安装

skillhub install cost-prediction-1776344306

下载

⬇ 下载 cost-prediction" v2.0.0(免费)

文件大小: 6.52 KB | 发布时间: 2026-4-17 15:54

v2.0.0 最新 2026-4-17 15:54
Major update: Adds comprehensive guidance and code for construction cost prediction using multiple ML models.

- Introduces an in-depth SKILL.md with project overview, methodology, and book reference.
- Provides detailed code snippets for data preparation, feature engineering, and handling inflation adjustments.
- Offers step-by-step instructions for training and evaluating Linear Regression, K-Nearest Neighbors, Random Forest, and Gradient Boosting models.
- Includes model evaluation metrics (MAE, RMSE, R², MAPE) and feature importance analysis.
- Enables easy model comparison for optimal selection.

Archiver·手机版·闲社网·闲社论坛·羊毛社区· 多链控股集团有限公司 · 苏ICP备2025199260号-1

Powered by Discuz! X5.0   © 2024-2025 闲社网·线报更新论坛·羊毛分享社区·http://xianshe.com

p2p_official_large
返回顶部