Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
科
科研小班
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
靓靓
科研小班
Commits
34435790
Commit
34435790
authored
Jul 30, 2025
by
re
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
直播
parent
a0aacdcd
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
770 additions
and
0 deletions
+770
-0
经典机器学习.ipynb
直播/1-第一阶段/1.5-经典应用-经典机器学习模型+分类问题/经典机器学习.ipynb
+770
-0
No files found.
直播/1-第一阶段/1.5-经典应用-经典机器学习模型+分类问题/经典机器学习.ipynb
0 → 100644
View file @
34435790
{
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# 分类"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## 导入数据集"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 1,
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"from sklearn import datasets\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n",
"import time"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"特征: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n",
"类别: ['setosa' 'versicolor' 'virginica']\n",
"数据形状: (150, 4)\n"
]
}
],
"source": [
"# 加载鸢尾花数据集\n",
"iris = datasets.load_iris()\n",
"X = iris.data # 特征矩阵 (150个样本 × 4个特征)\n",
"y = iris.target # 目标向量 (类别标签)\n",
"\n",
"# 特征名称和目标类别名称\n",
"feature_names = iris.feature_names\n",
"class_names = iris.target_names\n",
"\n",
"print(\"特征:\", feature_names)\n",
"print(\"类别:\", class_names)\n",
"print(\"数据形状:\", X.shape)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [],
"source": [
"# 划分训练集和测试集 (70%训练, 30%测试)\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.3, random_state=42\n",
")\n",
"\n",
"# 特征标准化\n",
"scaler = StandardScaler()\n",
"X_train = scaler.fit_transform(X_train)\n",
"X_test = scaler.transform(X_test)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## KNN"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 21,
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsClassifier\n",
"# 创建KNN分类器 (k=3)\n",
"knn = KNeighborsClassifier(n_neighbors=3)\n",
"\n",
"# 记录训练时间\n",
"start_time = time.time()\n",
"# 训练模型\n",
"knn.fit(X_train, y_train)\n",
"training_time = time.time() - start_time\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 22,
"outputs": [
{
"data": {
"text/plain": "0.0018961429595947266"
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"training_time"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 24,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集准确率: 100.00%\n",
"\n",
"分类报告:\n",
" precision recall f1-score support\n",
"\n",
" setosa 1.00 1.00 1.00 19\n",
" versicolor 1.00 1.00 1.00 13\n",
" virginica 1.00 1.00 1.00 13\n",
"\n",
" accuracy 1.00 45\n",
" macro avg 1.00 1.00 1.00 45\n",
"weighted avg 1.00 1.00 1.00 45\n",
"\n",
"\n",
"混淆矩阵:\n",
"[[19 0 0]\n",
" [ 0 13 0]\n",
" [ 0 0 13]]\n"
]
}
],
"source": [
"# 预测测试集\n",
"y_pred = knn.predict(X_test)\n",
"\n",
"def evaluate(y_test,y_pred):\n",
" # 评估指标\n",
" print(\"测试集准确率: {:.2f}%\".format(accuracy_score(y_test, y_pred) * 100))\n",
" print(\"\\n分类报告:\")\n",
" print(classification_report(y_test, y_pred, target_names=class_names))\n",
"\n",
" print(\"\\n混淆矩阵:\")\n",
" print(confusion_matrix(y_test, y_pred))\n",
"\n",
"evaluate(y_test, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 25,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集准确率: 94.29%\n",
"\n",
"分类报告:\n",
" precision recall f1-score support\n",
"\n",
" setosa 1.00 1.00 1.00 31\n",
" versicolor 0.90 0.95 0.92 37\n",
" virginica 0.94 0.89 0.92 37\n",
"\n",
" accuracy 0.94 105\n",
" macro avg 0.95 0.95 0.95 105\n",
"weighted avg 0.94 0.94 0.94 105\n",
"\n",
"\n",
"混淆矩阵:\n",
"[[31 0 0]\n",
" [ 0 35 2]\n",
" [ 0 4 33]]\n"
]
}
],
"source": [
"# 预测测试集\n",
"y_pred = knn.predict(X_train)\n",
"\n",
"# 评估指标\n",
"evaluate(y_train, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## 随机森林"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 26,
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"# 创建和训练随机森林模型\n",
"rf = RandomForestClassifier(\n",
" n_estimators=100, # 树的数量\n",
" random_state=42, # 保证可复现性\n",
" max_features='sqrt', # 每棵树选择特征的比例\n",
" oob_score=True # 使用袋外样本评估\n",
")\n",
"\n",
"# 记录训练时间\n",
"start_time = time.time()\n",
"rf.fit(X_train, y_train)\n",
"training_time = time.time() - start_time\n",
"\n",
"# 模型评估\n",
"# 预测\n",
"y_pred = rf.predict(X_test)\n",
"y_proba = rf.predict_proba(X_test)\n",
"\n",
"# 基础指标\n",
"accuracy = accuracy_score(y_test, y_pred)\n",
"oob_accuracy = rf.oob_score_"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 27,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集准确率: 100.00%\n",
"\n",
"分类报告:\n",
" precision recall f1-score support\n",
"\n",
" setosa 1.00 1.00 1.00 19\n",
" versicolor 1.00 1.00 1.00 13\n",
" virginica 1.00 1.00 1.00 13\n",
"\n",
" accuracy 1.00 45\n",
" macro avg 1.00 1.00 1.00 45\n",
"weighted avg 1.00 1.00 1.00 45\n",
"\n",
"\n",
"混淆矩阵:\n",
"[[19 0 0]\n",
" [ 0 13 0]\n",
" [ 0 0 13]]\n"
]
}
],
"source": [
"# 评估指标\n",
"evaluate(y_test, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 28,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集准确率: 100.00%\n",
"\n",
"分类报告:\n",
" precision recall f1-score support\n",
"\n",
" setosa 1.00 1.00 1.00 31\n",
" versicolor 1.00 1.00 1.00 37\n",
" virginica 1.00 1.00 1.00 37\n",
"\n",
" accuracy 1.00 105\n",
" macro avg 1.00 1.00 1.00 105\n",
"weighted avg 1.00 1.00 1.00 105\n",
"\n",
"\n",
"混淆矩阵:\n",
"[[31 0 0]\n",
" [ 0 37 0]\n",
" [ 0 0 37]]\n"
]
}
],
"source": [
"# 训练集\n",
"y_pred = rf.predict(X_train)\n",
"\n",
"# 评估指标\n",
"evaluate(y_train, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## SVM"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 29,
"outputs": [
{
"data": {
"text/plain": "SVC(C=10, gamma=0.1, probability=True)",
"text/html": "<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>SVC(C=10, gamma=0.1, probability=True)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC(C=10, gamma=0.1, probability=True)</pre></div></div></div></div></div>"
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#SVM\n",
"from sklearn.svm import SVC\n",
"\n",
"# 非线性问题使用RBF核\n",
"svc_rbf = SVC(kernel='rbf', C=10, gamma=0.1, probability=True)\n",
"svc_rbf.fit(X_train, y_train)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 30,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集准确率: 97.14%\n",
"\n",
"分类报告:\n",
" precision recall f1-score support\n",
"\n",
" setosa 1.00 1.00 1.00 31\n",
" versicolor 1.00 0.92 0.96 37\n",
" virginica 0.93 1.00 0.96 37\n",
"\n",
" accuracy 0.97 105\n",
" macro avg 0.97 0.97 0.97 105\n",
"weighted avg 0.97 0.97 0.97 105\n",
"\n",
"\n",
"混淆矩阵:\n",
"[[31 0 0]\n",
" [ 0 34 3]\n",
" [ 0 0 37]]\n"
]
}
],
"source": [
"# 训练集\n",
"y_pred = svc_rbf.predict(X_train)\n",
"\n",
"# 评估指标\n",
"evaluate(y_train, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 31,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集准确率: 97.78%\n",
"\n",
"分类报告:\n",
" precision recall f1-score support\n",
"\n",
" setosa 1.00 1.00 1.00 19\n",
" versicolor 1.00 0.92 0.96 13\n",
" virginica 0.93 1.00 0.96 13\n",
"\n",
" accuracy 0.98 45\n",
" macro avg 0.98 0.97 0.97 45\n",
"weighted avg 0.98 0.98 0.98 45\n",
"\n",
"\n",
"混淆矩阵:\n",
"[[19 0 0]\n",
" [ 0 12 1]\n",
" [ 0 0 13]]\n"
]
}
],
"source": [
"# 非线性问题使用RBF核\n",
"svc_linear = SVC(kernel='linear', C=1.0, probability=True)\n",
"svc_linear.fit(X_train, y_train)\n",
"# 训练集\n",
"y_pred = svc_linear.predict(X_test)\n",
"# 评估指标\n",
"evaluate(y_test, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"# 回归"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"from sklearn.datasets import fetch_california_housing\n",
"# 加载加州房价数据集\n",
"data = fetch_california_housing()\n",
"X = data.data\n",
"y = data.target\n",
"\n",
"# 数据预处理 - 标准化特征\n",
"scaler = StandardScaler()\n",
"X_scaled = scaler.fit_transform(X)\n",
"\n",
"# 划分训练集和测试集 (70% 训练, 30% 测试)\n",
"X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [
{
"data": {
"text/plain": " MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n... ... ... ... ... ... ... ... \n20635 1.5603 25.0 5.045455 1.133333 845.0 2.560606 39.48 \n20636 2.5568 18.0 6.114035 1.315789 356.0 3.122807 39.49 \n20637 1.7000 17.0 5.205543 1.120092 1007.0 2.325635 39.43 \n20638 1.8672 18.0 5.329513 1.171920 741.0 2.123209 39.43 \n20639 2.3886 16.0 5.254717 1.162264 1387.0 2.616981 39.37 \n\n Longitude \n0 -122.23 \n1 -122.22 \n2 -122.24 \n3 -122.25 \n4 -122.25 \n... ... \n20635 -121.09 \n20636 -121.21 \n20637 -121.22 \n20638 -121.32 \n20639 -121.24 \n\n[20640 rows x 8 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>MedInc</th>\n <th>HouseAge</th>\n <th>AveRooms</th>\n <th>AveBedrms</th>\n <th>Population</th>\n <th>AveOccup</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>8.3252</td>\n <td>41.0</td>\n <td>6.984127</td>\n <td>1.023810</td>\n <td>322.0</td>\n <td>2.555556</td>\n <td>37.88</td>\n <td>-122.23</td>\n </tr>\n <tr>\n <th>1</th>\n <td>8.3014</td>\n <td>21.0</td>\n <td>6.238137</td>\n <td>0.971880</td>\n <td>2401.0</td>\n <td>2.109842</td>\n <td>37.86</td>\n <td>-122.22</td>\n </tr>\n <tr>\n <th>2</th>\n <td>7.2574</td>\n <td>52.0</td>\n <td>8.288136</td>\n <td>1.073446</td>\n <td>496.0</td>\n <td>2.802260</td>\n <td>37.85</td>\n <td>-122.24</td>\n </tr>\n <tr>\n <th>3</th>\n <td>5.6431</td>\n <td>52.0</td>\n <td>5.817352</td>\n <td>1.073059</td>\n <td>558.0</td>\n <td>2.547945</td>\n <td>37.85</td>\n <td>-122.25</td>\n </tr>\n <tr>\n <th>4</th>\n <td>3.8462</td>\n <td>52.0</td>\n <td>6.281853</td>\n <td>1.081081</td>\n <td>565.0</td>\n <td>2.181467</td>\n <td>37.85</td>\n <td>-122.25</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>20635</th>\n <td>1.5603</td>\n <td>25.0</td>\n <td>5.045455</td>\n <td>1.133333</td>\n <td>845.0</td>\n <td>2.560606</td>\n <td>39.48</td>\n <td>-121.09</td>\n </tr>\n <tr>\n <th>20636</th>\n <td>2.5568</td>\n <td>18.0</td>\n <td>6.114035</td>\n <td>1.315789</td>\n <td>356.0</td>\n <td>3.122807</td>\n <td>39.49</td>\n <td>-121.21</td>\n </tr>\n <tr>\n <th>20637</th>\n <td>1.7000</td>\n <td>17.0</td>\n <td>5.205543</td>\n <td>1.120092</td>\n <td>1007.0</td>\n <td>2.325635</td>\n <td>39.43</td>\n <td>-121.22</td>\n </tr>\n <tr>\n <th>20638</th>\n <td>1.8672</td>\n <td>18.0</td>\n <td>5.329513</td>\n <td>1.171920</td>\n <td>741.0</td>\n <td>2.123209</td>\n <td>39.43</td>\n <td>-121.32</td>\n </tr>\n <tr>\n <th>20639</th>\n <td>2.3886</td>\n <td>16.0</td>\n <td>5.254717</td>\n <td>1.162264</td>\n <td>1387.0</td>\n <td>2.616981</td>\n <td>39.37</td>\n <td>-121.24</td>\n </tr>\n </tbody>\n</table>\n<p>20640 rows × 8 columns</p>\n</div>"
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(X,columns=data.feature_names)\n",
"df"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## SVM"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [],
"source": [
"from sklearn.svm import SVR\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"\n",
"\n",
"# 初始化模型并设置参数\n",
"svr = SVR(kernel='rbf', C=1.0, epsilon=0.1)\n",
"\n",
"# 训练模型、预测并评估\n",
"svr.fit(X_train, y_train)\n",
"# 预测\n",
"y_pred = svr.predict(X_test)\n",
"# 评估\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"r2: 0.73\n",
"mse: 0.35\n"
]
}
],
"source": [
"def r2_mse(y_test, y_pred):\n",
" mse = mean_squared_error(y_test, y_pred)\n",
" r2 = r2_score(y_test, y_pred)\n",
" print(\"r2: {:.2f}\".format(r2))\n",
" print(\"mse: {:.2f}\".format(mse))\n",
"\n",
"r2_mse(y_test, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## 决策树"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"r2: 0.60\n",
"mse: 0.52\n"
]
}
],
"source": [
"from sklearn.tree import DecisionTreeRegressor\n",
"\n",
"DT = DecisionTreeRegressor(max_depth=5,\n",
" min_samples_split=5,\n",
" random_state=42)\n",
"# 训练模型、预测并评估\n",
"DT.fit(X_train, y_train)\n",
"# 预测\n",
"y_pred = DT.predict(X_test)\n",
"# 评估\n",
"r2_mse(y_test, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## 随机森林"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 13,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"r2: 0.78\n",
"mse: 0.29\n"
]
}
],
"source": [
"from sklearn.ensemble import RandomForestRegressor\n",
"\n",
"RF = RandomForestRegressor(n_estimators=100,\n",
" max_depth=10,\n",
" random_state=42\n",
" )\n",
"# 训练模型、预测并评估\n",
"RF.fit(X_train, y_train)\n",
"# 预测\n",
"y_pred = RF.predict(X_test)\n",
"# 评估\n",
"r2_mse(y_test, y_pred)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment