Commit a0aacdcd by re

生化环材数据集_01

parent ba08acb4
{
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"outputs": [],
"source": [
"import pandas as pd"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [
{
"data": {
"text/plain": " DOI 分离物质名称 水 甲醇 乙腈 pH \\\n0 3种新型α_1-受体阻断剂...性流动相HPLC分离与制备_牛长群 S-Alfuzosin 92.5 0 7.5 5.6 \n1 3种新型α_2-受体阻断剂...性流动相HPLC分离与制备_牛长群 R-Alfuzosin 92.5 0 7.5 5.6 \n2 3种新型α_3-受体阻断剂...性流动相HPLC分离与制备_牛长群 S-Terazosin 97.0 0 3.0 6.0 \n3 3种新型α_4-受体阻断剂...性流动相HPLC分离与制备_牛长群 R-Terazosin 97.0 0 3.0 6.0 \n4 3种新型α_5-受体阻断剂...性流动相HPLC分离与制备_牛长群 S-Doxazosin 80.0 0 20.0 5.8 \n.. ... ... ... .. ... ... \n253 NaN S-Venlafaxine 85.0 15 0.0 5.0 \n254 NaN R-Metoprolol 85.0 15 0.0 5.0 \n255 NaN S-Metoprolol 85.0 15 0.0 5.0 \n256 NaN R-Venlafaxine 85.0 15 0.0 5.0 \n257 NaN S-Venlafaxine 85.0 15 0.0 5.0 \n\n 流速 柱温 手性添加剂 添加剂用量 色谱柱 柱长 保留时间 \n0 1.0 NaN CM-B-CD 19.5 mmol/L C4 NaN 42.80 \n1 1.0 NaN CM-B-CD 19.5 mmol/L C4 NaN 47.40 \n2 1.0 NaN CM-B-CD 32.4 mmol/L C4 NaN 90.00 \n3 1.0 NaN CM-B-CD 32.4 mmol/L C4 NaN 97.60 \n4 1.0 NaN CM-B-CD 13 mmol/L C4 NaN 47.10 \n.. ... ... ... ... ... ... ... \n253 0.5 30 CM-B-CD 20 mmol/L C18 150×4.6 32.39 \n254 0.5 30 CM-B-CD 40 mmol/L C18 150×4.6 13.65 \n255 0.5 30 CM-B-CD 40 mmol/L C18 150×4.6 14.61 \n256 0.5 30 CM-B-CD 40 mmol/L C18 150×4.6 26.33 \n257 0.5 30 CM-B-CD 40 mmol/L C18 150×4.6 29.22 \n\n[258 rows x 13 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>DOI</th>\n <th>分离物质名称</th>\n <th>水</th>\n <th>甲醇</th>\n <th>乙腈</th>\n <th>pH</th>\n <th>流速</th>\n <th>柱温</th>\n <th>手性添加剂</th>\n <th>添加剂用量</th>\n <th>色谱柱</th>\n <th>柱长</th>\n <th>保留时间</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>3种新型α_1-受体阻断剂...性流动相HPLC分离与制备_牛长群</td>\n <td>S-Alfuzosin</td>\n <td>92.5</td>\n <td>0</td>\n <td>7.5</td>\n <td>5.6</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>CM-B-CD</td>\n <td>19.5 mmol/L</td>\n <td>C4</td>\n <td>NaN</td>\n <td>42.80</td>\n </tr>\n <tr>\n <th>1</th>\n <td>3种新型α_2-受体阻断剂...性流动相HPLC分离与制备_牛长群</td>\n <td>R-Alfuzosin</td>\n <td>92.5</td>\n <td>0</td>\n <td>7.5</td>\n <td>5.6</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>CM-B-CD</td>\n <td>19.5 mmol/L</td>\n <td>C4</td>\n <td>NaN</td>\n <td>47.40</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3种新型α_3-受体阻断剂...性流动相HPLC分离与制备_牛长群</td>\n <td>S-Terazosin</td>\n <td>97.0</td>\n <td>0</td>\n <td>3.0</td>\n <td>6.0</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>CM-B-CD</td>\n <td>32.4 mmol/L</td>\n <td>C4</td>\n <td>NaN</td>\n <td>90.00</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3种新型α_4-受体阻断剂...性流动相HPLC分离与制备_牛长群</td>\n <td>R-Terazosin</td>\n <td>97.0</td>\n <td>0</td>\n <td>3.0</td>\n <td>6.0</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>CM-B-CD</td>\n <td>32.4 mmol/L</td>\n <td>C4</td>\n <td>NaN</td>\n <td>97.60</td>\n </tr>\n <tr>\n <th>4</th>\n <td>3种新型α_5-受体阻断剂...性流动相HPLC分离与制备_牛长群</td>\n <td>S-Doxazosin</td>\n <td>80.0</td>\n <td>0</td>\n <td>20.0</td>\n <td>5.8</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>CM-B-CD</td>\n <td>13 mmol/L</td>\n <td>C4</td>\n <td>NaN</td>\n <td>47.10</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>253</th>\n <td>NaN</td>\n <td>S-Venlafaxine</td>\n <td>85.0</td>\n <td>15</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-B-CD</td>\n <td>20 mmol/L</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>32.39</td>\n </tr>\n <tr>\n <th>254</th>\n <td>NaN</td>\n <td>R-Metoprolol</td>\n <td>85.0</td>\n <td>15</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-B-CD</td>\n <td>40 mmol/L</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>13.65</td>\n </tr>\n <tr>\n <th>255</th>\n <td>NaN</td>\n <td>S-Metoprolol</td>\n <td>85.0</td>\n <td>15</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-B-CD</td>\n <td>40 mmol/L</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>14.61</td>\n </tr>\n <tr>\n <th>256</th>\n <td>NaN</td>\n <td>R-Venlafaxine</td>\n <td>85.0</td>\n <td>15</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-B-CD</td>\n <td>40 mmol/L</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>26.33</td>\n </tr>\n <tr>\n <th>257</th>\n <td>NaN</td>\n <td>S-Venlafaxine</td>\n <td>85.0</td>\n <td>15</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-B-CD</td>\n <td>40 mmol/L</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>29.22</td>\n </tr>\n </tbody>\n</table>\n<p>258 rows × 13 columns</p>\n</div>"
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#数据处理\n",
"data=pd.read_excel(r'dataset.xlsx')\n",
"data"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": " 序号 分离物质名称 SMILES 水 甲醇 乙腈 pH 流速 柱温 手性添加剂 添加剂用量 \\\n0 1.0 S-Alfuzosin NaN 92.5 0.0 7.5 5.6 1 25 CM-β-CD NaN \n1 2.0 R-Alfuzosin NaN 92.5 0.0 7.5 5.6 1 25 CM-β-CD NaN \n2 3.0 S-Terazosin NaN 97.0 0.0 3.0 6.0 1 25 CM-β-CD NaN \n3 4.0 R-Terazosin NaN 97.0 0.0 3.0 6.0 1 25 CM-β-CD NaN \n4 5.0 S-Doxazosin NaN 80.0 0.0 20.0 5.8 1 25 CM-β-CD NaN \n.. ... ... ... ... ... ... ... ... .. ... ... \n712 NaN S-文拉法辛 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 20 \n713 NaN R-美托洛尔 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 40 \n714 NaN S-美托洛尔 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 40 \n715 NaN R-文拉法辛 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 40 \n716 NaN S-文拉法辛 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 40 \n\n 色谱柱 柱长 保留时间 分离因子 备注 column_y column_x \n0 C4 250×4.6 42.8 1.1 NaN 250.0 4.6 \n1 C4 250×4.6 47.4 NaN NaN 250.0 4.6 \n2 C4 250×4.6 90 1.1 NaN 250.0 4.6 \n3 C4 250×4.6 97.6 NaN NaN 250.0 4.6 \n4 C4 250×4.6 47.1 1.2 NaN 250.0 4.6 \n.. ... ... ... ... ... ... ... \n712 C18 150×4.6 32.39 NaN NaN 150.0 4.6 \n713 C18 150×4.6 13.65 NaN NaN 150.0 4.6 \n714 C18 150×4.6 14.61 NaN NaN 150.0 4.6 \n715 C18 150×4.6 26.33 NaN NaN 150.0 4.6 \n716 C18 150×4.6 29.22 NaN NaN 150.0 4.6 \n\n[717 rows x 18 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>序号</th>\n <th>分离物质名称</th>\n <th>SMILES</th>\n <th>水</th>\n <th>甲醇</th>\n <th>乙腈</th>\n <th>pH</th>\n <th>流速</th>\n <th>柱温</th>\n <th>手性添加剂</th>\n <th>添加剂用量</th>\n <th>色谱柱</th>\n <th>柱长</th>\n <th>保留时间</th>\n <th>分离因子</th>\n <th>备注</th>\n <th>column_y</th>\n <th>column_x</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1.0</td>\n <td>S-Alfuzosin</td>\n <td>NaN</td>\n <td>92.5</td>\n <td>0.0</td>\n <td>7.5</td>\n <td>5.6</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>42.8</td>\n <td>1.1</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2.0</td>\n <td>R-Alfuzosin</td>\n <td>NaN</td>\n <td>92.5</td>\n <td>0.0</td>\n <td>7.5</td>\n <td>5.6</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>47.4</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3.0</td>\n <td>S-Terazosin</td>\n <td>NaN</td>\n <td>97.0</td>\n <td>0.0</td>\n <td>3.0</td>\n <td>6.0</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>90</td>\n <td>1.1</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4.0</td>\n <td>R-Terazosin</td>\n <td>NaN</td>\n <td>97.0</td>\n <td>0.0</td>\n <td>3.0</td>\n <td>6.0</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>97.6</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5.0</td>\n <td>S-Doxazosin</td>\n <td>NaN</td>\n <td>80.0</td>\n <td>0.0</td>\n <td>20.0</td>\n <td>5.8</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>47.1</td>\n <td>1.2</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>712</th>\n <td>NaN</td>\n <td>S-文拉法辛</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>20</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>32.39</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>713</th>\n <td>NaN</td>\n <td>R-美托洛尔</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>40</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>13.65</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>714</th>\n <td>NaN</td>\n <td>S-美托洛尔</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>40</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>14.61</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>715</th>\n <td>NaN</td>\n <td>R-文拉法辛</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>40</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>26.33</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>716</th>\n <td>NaN</td>\n <td>S-文拉法辛</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>40</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>29.22</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n </tbody>\n</table>\n<p>717 rows × 18 columns</p>\n</div>"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# data.loc[:,\"保留时间\"] = data[\"保留时间\"].fillna(0)\n",
"data.loc[:,\"pH\"] = data[\"pH\"].fillna(7)\n",
"data.loc[:,\"柱温\"] = data[\"柱温\"].fillna(25)\n",
"data.loc[:,\"柱长\"] = data[\"柱长\"].fillna(\"250×4.6\")\n",
"data.loc[:,\"流速\"] = data[\"流速\"].fillna(1)\n",
"# data = data.drop([\"差值\",\"序号\",\"分离因子\",\"DOI\"],axis=1)\n",
"# data = data.drop([\"SMILES\",\"序号\",\"DOI\"],axis=1)\n",
"data = data.drop([\"DOI\"],axis=1)\n",
"data.loc[:,\"色谱柱\"] = data[\"色谱柱\"].fillna(\"C18\")\n",
"data[['column_y','column_x']] = data['柱长'].str.split('×',expand = True).astype(float)\n",
"\n",
"data.replace({\"柱温\":{\"室温\":25}},inplace = True)\n",
"\n",
"data[\"添加剂用量\"]=data[\"添加剂用量\"].str.rstrip(\"mmol/L\")\n",
"# data[\"添加剂用量\"].astype(float)\n",
"data"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [
{
"data": {
"text/plain": " 序号 分离物质名称 SMILES 水 甲醇 乙腈 pH 流速 柱温 手性添加剂 添加剂用量 \\\n0 1.0 S-Alfuzosin NaN 92.5 0.0 7.5 5.6 1 25 CM-β-CD NaN \n1 2.0 R-Alfuzosin NaN 92.5 0.0 7.5 5.6 1 25 CM-β-CD NaN \n2 3.0 S-Terazosin NaN 97.0 0.0 3.0 6.0 1 25 CM-β-CD NaN \n3 4.0 R-Terazosin NaN 97.0 0.0 3.0 6.0 1 25 CM-β-CD NaN \n4 5.0 S-Doxazosin NaN 80.0 0.0 20.0 5.8 1 25 CM-β-CD NaN \n.. ... ... ... ... ... ... ... ... .. ... ... \n712 NaN S-文拉法辛 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 20 \n713 NaN R-美托洛尔 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 40 \n714 NaN S-美托洛尔 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 40 \n715 NaN R-文拉法辛 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 40 \n716 NaN S-文拉法辛 NaN 85.0 15.0 0.0 5.0 0.5 30 CM-β-CD 40 \n\n 色谱柱 柱长 保留时间 分离因子 备注 column_y column_x \n0 C4 250×4.6 42.8 1.1 NaN 250.0 4.6 \n1 C4 250×4.6 47.4 NaN NaN 250.0 4.6 \n2 C4 250×4.6 90 1.1 NaN 250.0 4.6 \n3 C4 250×4.6 97.6 NaN NaN 250.0 4.6 \n4 C4 250×4.6 47.1 1.2 NaN 250.0 4.6 \n.. ... ... ... ... ... ... ... \n712 C18 150×4.6 32.39 NaN NaN 150.0 4.6 \n713 C18 150×4.6 13.65 NaN NaN 150.0 4.6 \n714 C18 150×4.6 14.61 NaN NaN 150.0 4.6 \n715 C18 150×4.6 26.33 NaN NaN 150.0 4.6 \n716 C18 150×4.6 29.22 NaN NaN 150.0 4.6 \n\n[666 rows x 18 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>序号</th>\n <th>分离物质名称</th>\n <th>SMILES</th>\n <th>水</th>\n <th>甲醇</th>\n <th>乙腈</th>\n <th>pH</th>\n <th>流速</th>\n <th>柱温</th>\n <th>手性添加剂</th>\n <th>添加剂用量</th>\n <th>色谱柱</th>\n <th>柱长</th>\n <th>保留时间</th>\n <th>分离因子</th>\n <th>备注</th>\n <th>column_y</th>\n <th>column_x</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1.0</td>\n <td>S-Alfuzosin</td>\n <td>NaN</td>\n <td>92.5</td>\n <td>0.0</td>\n <td>7.5</td>\n <td>5.6</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>42.8</td>\n <td>1.1</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2.0</td>\n <td>R-Alfuzosin</td>\n <td>NaN</td>\n <td>92.5</td>\n <td>0.0</td>\n <td>7.5</td>\n <td>5.6</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>47.4</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3.0</td>\n <td>S-Terazosin</td>\n <td>NaN</td>\n <td>97.0</td>\n <td>0.0</td>\n <td>3.0</td>\n <td>6.0</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>90</td>\n <td>1.1</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4.0</td>\n <td>R-Terazosin</td>\n <td>NaN</td>\n <td>97.0</td>\n <td>0.0</td>\n <td>3.0</td>\n <td>6.0</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>97.6</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5.0</td>\n <td>S-Doxazosin</td>\n <td>NaN</td>\n <td>80.0</td>\n <td>0.0</td>\n <td>20.0</td>\n <td>5.8</td>\n <td>1</td>\n <td>25</td>\n <td>CM-β-CD</td>\n <td>NaN</td>\n <td>C4</td>\n <td>250×4.6</td>\n <td>47.1</td>\n <td>1.2</td>\n <td>NaN</td>\n <td>250.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>712</th>\n <td>NaN</td>\n <td>S-文拉法辛</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>20</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>32.39</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>713</th>\n <td>NaN</td>\n <td>R-美托洛尔</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>40</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>13.65</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>714</th>\n <td>NaN</td>\n <td>S-美托洛尔</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>40</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>14.61</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>715</th>\n <td>NaN</td>\n <td>R-文拉法辛</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>40</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>26.33</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n <tr>\n <th>716</th>\n <td>NaN</td>\n <td>S-文拉法辛</td>\n <td>NaN</td>\n <td>85.0</td>\n <td>15.0</td>\n <td>0.0</td>\n <td>5.0</td>\n <td>0.5</td>\n <td>30</td>\n <td>CM-β-CD</td>\n <td>40</td>\n <td>C18</td>\n <td>150×4.6</td>\n <td>29.22</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>4.6</td>\n </tr>\n </tbody>\n</table>\n<p>666 rows × 18 columns</p>\n</div>"
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_clean = data.dropna(subset=[\"保留时间\"])\n",
"data_clean"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 40,
"outputs": [],
"source": [
"# 流动相溶剂介电常数\n",
"a_Dielectricz_Constants = 80\n",
"b_Dielectricz_Constants = 33.6\n",
"c_Dielectricz_Constants = 37.5"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 41,
"outputs": [],
"source": [
"# 标准化\n",
"data[\"水\"] = data[\"水\"]/100\n",
"data[\"甲醇\"] = data[\"甲醇\"]/100\n",
"data[\"乙腈\"] = data[\"乙腈\"]/100"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 42,
"outputs": [],
"source": [
"# 估计流动相混合溶液的极性\n",
"data[\"solvent_polar\"] = data[\"水\"]*a_Dielectricz_Constants + data[\"甲醇\"]* b_Dielectricz_Constants + data[\"乙腈\"]*c_Dielectricz_Constants"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 43,
"outputs": [],
"source": [
"data[\"solvent_polar\"]\n",
"data = data.drop([\"水\",'甲醇','乙腈','柱长'],axis=1)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 45,
"outputs": [
{
"data": {
"text/plain": " 序号 分离物质名称 pH 流速 柱温 手性添加剂 添加剂用量 色谱柱 保留时间 column_y \\\n0 1.0 S-Alfuzosin 5.6 1.0 25 CM-B-CD 19.5 C4 42.8 250.0 \n1 2.0 R-Alfuzosin 5.6 1.0 25 CM-B-CD 19.5 C4 47.4 250.0 \n2 3.0 S-Terazosin 6.0 1.0 25 CM-B-CD 32.4 C4 90.0 250.0 \n3 4.0 R-Terazosin 6.0 1.0 25 CM-B-CD 32.4 C4 97.6 250.0 \n4 5.0 S-Doxazosin 5.8 1.0 25 CM-B-CD 13 C4 47.1 250.0 \n.. ... ... ... ... .. ... ... ... ... ... \n275 NaN R-Ibuprofen 4.6 1.0 40 HP-B-CD 25 C18 45.8 150.0 \n276 NaN S-Ibuprofen 4.6 1.0 50 HP-B-CD 25 C18 37.9 150.0 \n277 NaN R-Ibuprofen 4.6 1.0 50 HP-B-CD 25 C18 40.3 150.0 \n278 NaN R-citalopram 4.0 0.8 25 B-CD 20 UF-CN 27.2 250.0 \n279 NaN S-citalopram 4.0 0.8 25 B-CD 20 UF-CN 24.6 250.0 \n\n column_x solvent_polar \n0 4.6 76.8125 \n1 4.6 76.8125 \n2 4.6 78.7250 \n3 4.6 78.7250 \n4 4.6 71.5000 \n.. ... ... \n275 4.6 66.0800 \n276 4.6 66.0800 \n277 4.6 66.0800 \n278 4.6 75.7500 \n279 4.6 75.7500 \n\n[280 rows x 12 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>序号</th>\n <th>分离物质名称</th>\n <th>pH</th>\n <th>流速</th>\n <th>柱温</th>\n <th>手性添加剂</th>\n <th>添加剂用量</th>\n <th>色谱柱</th>\n <th>保留时间</th>\n <th>column_y</th>\n <th>column_x</th>\n <th>solvent_polar</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1.0</td>\n <td>S-Alfuzosin</td>\n <td>5.6</td>\n <td>1.0</td>\n <td>25</td>\n <td>CM-B-CD</td>\n <td>19.5</td>\n <td>C4</td>\n <td>42.8</td>\n <td>250.0</td>\n <td>4.6</td>\n <td>76.8125</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2.0</td>\n <td>R-Alfuzosin</td>\n <td>5.6</td>\n <td>1.0</td>\n <td>25</td>\n <td>CM-B-CD</td>\n <td>19.5</td>\n <td>C4</td>\n <td>47.4</td>\n <td>250.0</td>\n <td>4.6</td>\n <td>76.8125</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3.0</td>\n <td>S-Terazosin</td>\n <td>6.0</td>\n <td>1.0</td>\n <td>25</td>\n <td>CM-B-CD</td>\n <td>32.4</td>\n <td>C4</td>\n <td>90.0</td>\n <td>250.0</td>\n <td>4.6</td>\n <td>78.7250</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4.0</td>\n <td>R-Terazosin</td>\n <td>6.0</td>\n <td>1.0</td>\n <td>25</td>\n <td>CM-B-CD</td>\n <td>32.4</td>\n <td>C4</td>\n <td>97.6</td>\n <td>250.0</td>\n <td>4.6</td>\n <td>78.7250</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5.0</td>\n <td>S-Doxazosin</td>\n <td>5.8</td>\n <td>1.0</td>\n <td>25</td>\n <td>CM-B-CD</td>\n <td>13</td>\n <td>C4</td>\n <td>47.1</td>\n <td>250.0</td>\n <td>4.6</td>\n <td>71.5000</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>275</th>\n <td>NaN</td>\n <td>R-Ibuprofen</td>\n <td>4.6</td>\n <td>1.0</td>\n <td>40</td>\n <td>HP-B-CD</td>\n <td>25</td>\n <td>C18</td>\n <td>45.8</td>\n <td>150.0</td>\n <td>4.6</td>\n <td>66.0800</td>\n </tr>\n <tr>\n <th>276</th>\n <td>NaN</td>\n <td>S-Ibuprofen</td>\n <td>4.6</td>\n <td>1.0</td>\n <td>50</td>\n <td>HP-B-CD</td>\n <td>25</td>\n <td>C18</td>\n <td>37.9</td>\n <td>150.0</td>\n <td>4.6</td>\n <td>66.0800</td>\n </tr>\n <tr>\n <th>277</th>\n <td>NaN</td>\n <td>R-Ibuprofen</td>\n <td>4.6</td>\n <td>1.0</td>\n <td>50</td>\n <td>HP-B-CD</td>\n <td>25</td>\n <td>C18</td>\n <td>40.3</td>\n <td>150.0</td>\n <td>4.6</td>\n <td>66.0800</td>\n </tr>\n <tr>\n <th>278</th>\n <td>NaN</td>\n <td>R-citalopram</td>\n <td>4.0</td>\n <td>0.8</td>\n <td>25</td>\n <td>B-CD</td>\n <td>20</td>\n <td>UF-CN</td>\n <td>27.2</td>\n <td>250.0</td>\n <td>4.6</td>\n <td>75.7500</td>\n </tr>\n <tr>\n <th>279</th>\n <td>NaN</td>\n <td>S-citalopram</td>\n <td>4.0</td>\n <td>0.8</td>\n <td>25</td>\n <td>B-CD</td>\n <td>20</td>\n <td>UF-CN</td>\n <td>24.6</td>\n <td>250.0</td>\n <td>4.6</td>\n <td>75.7500</td>\n </tr>\n </tbody>\n</table>\n<p>280 rows × 12 columns</p>\n</div>"
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 45,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"name": "py310",
"language": "python",
"display_name": "py310"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment