{ “cells”: [ { “cell_type”: “code”, “id”: “initial_id”, “metadata”: { “collapsed”: true, “ExecuteTime”: { “end_time”: “2025-12-03T13:42:53.349874Z”, “start_time”: “2025-12-03T13:42:53.344608Z” } }, “source”: [ “import pandas as pd\n”, “import numpy as np\n”, “s = pd.Series([10,2,np.nan,None,3,4,5], index=[‘A’,‘B’,‘C’,‘D’,‘E’,‘F’,‘G’], name=‘data’)\n”, “print(s)” ], “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “A 10.0\n”, “B 2.0\n”, “C NaN\n”, “D NaN\n”, “E 3.0\n”, “F 4.0\n”, “G 5.0\n”, “Name: data, dtype: float64\n” ] } ], “execution_count”: 9 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T13:44:06.717070Z”, “start_time”: “2025-12-03T13:44:06.710900Z” } }, “cell_type”: “code”, “source”: [ “s.head(3)\n”, “s.tail(2)” ], “id”: “58c79cbdd60255d”, “outputs”: [ { “data”: { “text/plain”: [ “F 4.0\n”, “G 5.0\n”, “Name: data, dtype: float64” ] }, “execution_count”: 15, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 15 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T13:43:39.527474Z”, “start_time”: “2025-12-03T13:43:39.519401Z” } }, “cell_type”: “code”, “source”: “s.describe()”, “id”: “bcd626b7e35a966d”, “outputs”: [ { “data”: { “text/plain”: [ “count 5.000000\n”, “mean 4.800000\n”, “std 3.114482\n”, “min 2.000000\n”, “25% 3.000000\n”, “50% 4.000000\n”, “75% 5.000000\n”, “max 10.000000\n”, “Name: data, dtype: float64” ] }, “execution_count”: 13, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 13 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T13:47:48.178Z”, “start_time”: “2025-12-03T13:47:48.173188Z” } }, “cell_type”: “code”, “source”: “print(s.count()) # 忽略缺失值 NAN”, “id”: “16a76aca15994a45”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “5\n” ] } ], “execution_count”: 16 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T13:48:44.234013Z”, “start_time”: “2025-12-03T13:48:44.229191Z” } }, “cell_type”: “code”, “source”: [ “print(s.keys())\n”, “print(s.index)” ], “id”: “bb1b0edea50242ac”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “Index([‘A’, ‘B’, ‘C’, ‘D’, ‘E’, ‘F’, ‘G’], dtype=‘object’)\n”, “Index([‘A’, ‘B’, ‘C’, ‘D’, ‘E’, ‘F’, ‘G’], dtype=‘object’)\n” ] } ], “execution_count”: 17 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T13:50:38.502795Z”, “start_time”: “2025-12-03T13:50:38.494787Z” } }, “cell_type”: “code”, “source”: [ “print(s.isna()) # 找出缺失值\n”, “s.isna()” ], “id”: “ecf9f6fbe83dae4f”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “A False\n”, “B False\n”, “C True\n”, “D True\n”, “E False\n”, “F False\n”, “G False\n”, “Name: data, dtype: bool\n” ] }, { “data”: { “text/plain”: [ “A False\n”, “B False\n”, “C True\n”, “D True\n”, “E False\n”, “F False\n”, “G False\n”, “Name: data, dtype: bool” ] }, “execution_count”: 19, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 19 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T13:52:26.239181Z”, “start_time”: “2025-12-03T13:52:26.232747Z” } }, “cell_type”: “code”, “source”: “s.isin([4,5,6]) # 检查每个元素是否在参数集合中”, “id”: “42c74562d935652b”, “outputs”: [ { “data”: { “text/plain”: [ “A False\n”, “B False\n”, “C False\n”, “D False\n”, “E False\n”, “F True\n”, “G True\n”, “Name: data, dtype: bool” ] }, “execution_count”: 22, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 22 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T13:53:47.333121Z”, “start_time”: “2025-12-03T13:53:47.324980Z” } }, “cell_type”: “code”, “source”: “s.describe()”, “id”: “61eb81cc0e4fdbf5”, “outputs”: [ { “data”: { “text/plain”: [ “count 5.000000\n”, “mean 4.800000\n”, “std 3.114482\n”, “min 2.000000\n”, “25% 3.000000\n”, “50% 4.000000\n”, “75% 5.000000\n”, “max 10.000000\n”, “Name: data, dtype: float64” ] }, “execution_count”: 23, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 23 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T13:58:12.344818Z”, “start_time”: “2025-12-03T13:58:12.338468Z” } }, “cell_type”: “code”, “source”: [ “s.sort_values()\n”, “s.quantile(0.75) # 分位数” ], “id”: “43287c36b304f989”, “outputs”: [ { “data”: { “text/plain”: [ “5.0” ] }, “execution_count”: 26, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 26 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T14:02:33.188556Z”, “start_time”: “2025-12-03T14:02:33.182329Z” } }, “cell_type”: “code”, “source”: [ “s[‘H’] = 4\n”, “print(s.mode())” ], “id”: “904f72d68e9b6653”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “0 4.0\n”, “Name: data, dtype: float64\n” ] } ], “execution_count”: 28 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T14:03:07.053335Z”, “start_time”: “2025-12-03T14:03:07.048464Z” } }, “cell_type”: “code”, “source”: “print(s.value_counts()) # 出现频率”, “id”: “a6ee30cd29865887”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “data\n”, “4.0 2\n”, “10.0 1\n”, “2.0 1\n”, “3.0 1\n”, “5.0 1\n”, “Name: count, dtype: int64\n” ] } ], “execution_count”: 29 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T14:05:36.875233Z”, “start_time”: “2025-12-03T14:05:36.868104Z” } }, “cell_type”: “code”, “source”: [ “# 去重\n”, “s.drop_duplicates()\n”, “s.unique()\n”, “print(s.nunique()) # 去重后的元素个数” ], “id”: “726ad516dd485ce1”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “5\n” ] } ], “execution_count”: 32 }, { “metadata”: {}, “cell_type”: “markdown”, “source”: “”, “id”: “a9d7e5ca04fc7ac7” }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T14:06:17.989112Z”, “start_time”: “2025-12-03T14:06:17.981230Z” } }, “cell_type”: “code”, “source”: [ “# 排序(索引、值)\n”, “s.sort_index()\n”, “s.sort_values()” ], “id”: “628292f4696406cc”, “outputs”: [ { “data”: { “text/plain”: [ “B 2.0\n”, “E 3.0\n”, “F 4.0\n”, “H 4.0\n”, “G 5.0\n”, “A 10.0\n”, “C NaN\n”, “D NaN\n”, “Name: data, dtype: float64” ] }, “execution_count”: 34, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 34 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-03T14:14:32.027768Z”, “start_time”: “2025-12-03T14:14:32.020655Z” } }, “cell_type”: “code”, “source”: [ “np.random.seed(42)\n”, “scores = pd.Series(np.random.randint(50,101,10), index=[‘学生’+str(i) for i in range(1,11)], name=‘score’)\n”, “print(scores)\n”, “print(‘平均分: ‘, scores.mean())\n”, “print(‘最高分: ‘, scores.max())\n”, “print(‘最低分: ‘, scores.min())\n”, “print(‘高于平均分的人数: ‘, len(scores[scores > scores.mean()]))” ], “id”: “1df782c1755992fc”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “学生1 88\n”, “学生2 78\n”, “学生3 64\n”, “学生4 92\n”, “学生5 57\n”, “学生6 70\n”, “学生7 88\n”, “学生8 68\n”, “学生9 72\n”, “学生10 60\n”, “Name: score, dtype: int32\n”, “平均分: 73.7\n”, “最高分: 92\n”, “最低分: 57\n”, “高于平均分的人数: 4\n” ] } ], “execution_count”: 44 }, { “metadata”: {}, “cell_type”: “markdown”, “source”: “”, “id”: “7c010a4e6de3f7f6” }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T09:03:59.263698Z”, “start_time”: “2025-12-09T09:03:58.245342Z” } }, “cell_type”: “code”, “source”: [ “import pandas as pd\n”, “import numpy as np\n”, “\n”, “temperatures = pd.Series([28, 31, 29, 32, 30, 27, 33],\n”, " index=[‘周一’, ‘周二’, ‘周三’, ‘周四’, ‘周五’, ‘周六’, ‘周日’])" ], “id”: “e9f547a9076f6e97”, “outputs”: [], “execution_count”: 2 }, { “metadata”: {}, “cell_type”: “markdown”, “source”: “”, “id”: “2bb43e03c986d951” }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T02:51:20.779629Z”, “start_time”: “2025-12-09T02:51:20.772193Z” } }, “cell_type”: “code”, “source”: “print(len(temperatures[temperatures>30]))”, “id”: “9abc745250c968c4”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “3\n” ] } ], “execution_count”: 5 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T02:52:20.026650Z”, “start_time”: “2025-12-09T02:52:20.021970Z” } }, “cell_type”: “code”, “source”: “print(temperatures.mean())”, “id”: “865f9a13555ed436”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “30.0\n” ] } ], “execution_count”: 6 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T02:54:25.952327Z”, “start_time”: “2025-12-09T02:54:25.947734Z” } }, “cell_type”: “code”, “source”: [ “t2 = temperatures.sort_values(ascending=False)\n”, “print(t2)” ], “id”: “5e99ccb5db66637e”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “周日 33\n”, “周四 32\n”, “周二 31\n”, “周五 30\n”, “周三 29\n”, “周一 28\n”, “周六 27\n”, “dtype: int64\n” ] } ], “execution_count”: 9 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T03:00:20.847337Z”, “start_time”: “2025-12-09T03:00:20.840930Z” } }, “cell_type”: “code”, “source”: [ “temperatures.diff() # 计算元素差值\n”, “t3 = temperatures.diff().abs()\n”, “t4 = t3.sort_values(ascending=False).index\n”, “print(*(t4[:2].tolist()))\n” ], “id”: “691e3d8458be8f48”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “周日 周二\n” ] } ], “execution_count”: 17 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T09:04:18.311246Z”, “start_time”: “2025-12-09T09:04:18.299028Z” } }, “cell_type”: “code”, “source”: [ “# 日期序列\n”, “date = pd.date_range(‘2000-06-1’, periods=6)\n”, “print(list(date))” ], “id”: “82a9cd7c618ce69a”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “[Timestamp(‘2000-06-01 00:00:00’), Timestamp(‘2000-06-02 00:00:00’), Timestamp(‘2000-06-03 00:00:00’), Timestamp(‘2000-06-04 00:00:00’), Timestamp(‘2000-06-05 00:00:00’), Timestamp(‘2000-06-06 00:00:00’)]\n” ] } ], “execution_count”: 4 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T09:04:20.012980Z”, “start_time”: “2025-12-09T09:04:20.006247Z” } }, “cell_type”: “code”, “source”: [ “prices = pd.Series([102.3, 103.5, 105.1, 104.8, 106.2, 107.0, 106.5, 108.1, 109.3, 110.2], index=pd.date_range(‘2023-01-01’, periods=10))\n”, “print(prices)” ], “id”: “7f4fc443d0971031”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “2023-01-01 102.3\n”, “2023-01-02 103.5\n”, “2023-01-03 105.1\n”, “2023-01-04 104.8\n”, “2023-01-05 106.2\n”, “2023-01-06 107.0\n”, “2023-01-07 106.5\n”, “2023-01-08 108.1\n”, “2023-01-09 109.3\n”, “2023-01-10 110.2\n”, “Freq: D, dtype: float64\n” ] } ], “execution_count”: 5 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T09:04:22.494910Z”, “start_time”: “2025-12-09T09:04:22.484858Z” } }, “cell_type”: “code”, “source”: [ “a = prices.pct_change() # percent 103.5/102.3 - 1\n”, “print(a.idxmax())\n”, “print(a.idxmin())\n”, “print(a.std())” ], “id”: “6c15acbb184ffb96”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “2023-01-03 00:00:00\n”, “2023-01-07 00:00:00\n”, “0.007373623845361105\n” ] } ], “execution_count”: 6 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T09:04:24.251350Z”, “start_time”: “2025-12-09T09:04:24.241768Z” } }, “cell_type”: “code”, “source”: [ “sales = pd.Series([120,135,145,160,155,170,180,175,190,200,210,220], index=pd.date_range(‘2022-01-01’, periods=12, freq=‘MS’))\n”, “sales” ], “id”: “90760e529f8dc525”, “outputs”: [ { “data”: { “text/plain”: [ “2022-01-01 120\n”, “2022-02-01 135\n”, “2022-03-01 145\n”, “2022-04-01 160\n”, “2022-05-01 155\n”, “2022-06-01 170\n”, “2022-07-01 180\n”, “2022-08-01 175\n”, “2022-09-01 190\n”, “2022-10-01 200\n”, “2022-11-01 210\n”, “2022-12-01 220\n”, “Freq: MS, dtype: int64” ] }, “execution_count”: 7, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 7 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T09:04:28.226654Z”, “start_time”: “2025-12-09T09:04:28.215072Z” } }, “cell_type”: “code”, “source”: [ “# 季度的平均销量\n”, “sales.resample(‘QS’).mean() # 按季度重新采样” ], “id”: “3ec4bb9bb6216e5f”, “outputs”: [ { “data”: { “text/plain”: [ “2022-01-01 133.333333\n”, “2022-04-01 161.666667\n”, “2022-07-01 181.666667\n”, “2022-10-01 210.000000\n”, “Freq: QS-JAN, dtype: float64” ] }, “execution_count”: 8, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 8 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T09:10:07.789249Z”, “start_time”: “2025-12-09T09:10:07.782112Z” } }, “cell_type”: “code”, “source”: [ “# 找出连续增长超过2个月的月份\n”, “a = sales.pct_change()\n”, “b = a > 0\n”, “b.rolling(3) # 滑动窗口\n”, “b[b.rolling(3).sum() == 3].keys().tolist()” ], “id”: “977d2db149a9280b”, “outputs”: [ { “data”: { “text/plain”: [ “[Timestamp(‘2022-04-01 00:00:00’),\n”, " Timestamp(‘2022-11-01 00:00:00’),\n", " Timestamp(‘2022-12-01 00:00:00’)]" ] }, “execution_count”: 18, “metadata”: {}, “output_type”: “execute_result” } ], “execution_count”: 18 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T11:07:36.920978Z”, “start_time”: “2025-12-09T11:07:36.916193Z” } }, “cell_type”: “code”, “source”: [ “import pandas as pd\n”, “import numpy as np\n”, “\n”, “np.random.seed(42)\n”, “hours_sales = pd.Series(np.random.randint(0, 100, 24), index = pd.date_range(‘2025-01-01’, periods=24, freq=‘h’))” ], “id”: “d30615c33884a83d”, “outputs”: [], “execution_count”: 5 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T11:26:21.085926Z”, “start_time”: “2025-12-09T11:26:21.076294Z” } }, “cell_type”: “code”, “source”: [ “day_sales = hours_sales.resample(‘D’).sum()\n”, “# hours_sales.sum()\n”, “\n”, “# business_hours_sales = hours_sales.between_time(‘8:00’, ‘22:00’).sum()\n”, “## bhs = hours_sales[(hours_sales.index.hour >= 8) & (hours_sales.index.hour <= 22)]\n”, “# free_sales = day_sales - business_hours_sales\n”, “# print(business_hours_sales / free_sales)\n”, “\n”, “# nbhs = hours_sales.drop(bhs.index)\n”, “## nbhs = hours_sales[(hours_sales.index.hour < 8) | (hours_sales.index.hour > 22)]\n”, “\n”, “mask = (hours_sales.index.hour >= 8) & (hours_sales.index.hour <= 22)\n”, “bhs = hours_sales.loc[mask]\n”, “nbhs = hours_sales.loc[~mask]\n”, “\n”, “print(bhs.sum() / nbhs.sum())\n”, “\n”, “print(hours_sales.nlargest(3).keys().tolist())\n” ], “id”: “3f18216bd52852b5”, “outputs”: [ { “name”: “stdout”, “output_type”: “stream”, “text”: [ “1.4294354838709677\n”, “[Timestamp(‘2025-01-01 11:00:00’), Timestamp(‘2025-01-01 01:00:00’), Timestamp(‘2025-01-01 10:00:00’)]\n” ] } ], “execution_count”: 24 }, { “metadata”: { “ExecuteTime”: { “end_time”: “2025-12-09T11:56:03.507547Z”, “start_time”: “2025-12-09T11:56:03.496915Z” } }, “cell_type”: “code”, “source”: [ “# dataframe\n”, “import pandas as pd\n”, “import numpy as np\n”, “\n”, “s1 = pd.Series([1,2,3,4,5])\n”, “s2 = pd.Series([6,7,8,9,10])\n”, “df = pd.DataFrame({‘a’:s1, ‘b’:s2}) # k-v\n”, “\n”, “df = pd.DataFrame(\n”, " {\n", " "name":[’tom’,‘Jack’,‘Alice’,‘Bob’,‘John’],\n", " "age":[15,17,21,24,30],\n", " "score":[60.5, 80, 40.5, 70, 85.5]\n", " },index=[1,2,3,4,5],columns=[’name’,‘score’,‘age’]\n", “)\n”, “df” ], “id”: “ddd8300b43319779”, “outputs”: [ { “data”: { “text/plain”: [ " name score age\n", “1 tom 60.5 15\n”, “2 Jack 80.0 17\n”, “3 Alice 40.5 21\n”, “4 Bob 70.0 24\n”, “5 John 85.5 30” ], “text/html”: [ “
\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" name \n",
" score \n",
" age \n",
" \n",
" \n",
" \n",
" \n",
" 1 \n",
" tom \n",
" 60.5 \n",
" 15 \n",
" \n",
" \n",
" 2 \n",
" Jack \n",
" 80.0 \n",
" 17 \n",
" \n",
" \n",
" 3 \n",
" Alice \n",
" 40.5 \n",
" 21 \n",
" \n",
" \n",
" 4 \n",
" Bob \n",
" 70.0 \n",
" 24 \n",
" \n",
" \n",
" 5 \n",
" John \n",
" 85.5 \n",
" 30 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 36,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 36
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:00:22.117878Z”,
“start_time”: “2025-12-09T12:00:22.110388Z”
}
},
“cell_type”: “code”,
“source”: [
“print(‘行索引: ‘)\n”,
“print(df.index)\n”,
“print(‘列标签: ‘)\n”,
“print(df.columns)\n”,
“print(‘值: ‘)\n”,
“print(df.values)”
],
“id”: “49b22e7a75930bd6”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“行索引: \n”,
“Index([1, 2, 3, 4, 5], dtype=‘int64’)\n”,
“列标签: \n”,
“Index([’name’, ‘score’, ‘age’], dtype=‘object’)\n”,
“值: \n”,
“[[’tom’ 60.5 15]\n”,
" [‘Jack’ 80.0 17]\n",
" [‘Alice’ 40.5 21]\n",
" [‘Bob’ 70.0 24]\n",
" [‘John’ 85.5 30]]\n"
]
}
],
“execution_count”: 38
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:02:15.120537Z”,
“start_time”: “2025-12-09T12:02:15.115248Z”
}
},
“cell_type”: “code”,
“source”: [
“print(‘维度: ‘, df.ndim)\n”,
“print(‘形状: ‘, df.shape)\n”,
“print(‘元素个数: ‘, df.size)\n”,
“print(‘数据类型: ‘)\n”,
“print(df.dtypes)”
],
“id”: “e6de5a4f2d5bddb4”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“维度: 2\n”,
“形状: (5, 3)\n”,
“元素个数: 15\n”,
“数据类型: \n”,
“name object\n”,
“score float64\n”,
“age int64\n”,
“dtype: object\n”
]
}
],
“execution_count”: 43
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:05:20.625562Z”,
“start_time”: “2025-12-09T12:05:20.616703Z”
}
},
“cell_type”: “code”,
“source”: [
“print(df.T.index) # 转置\n”,
“df”
],
“id”: “b84daf0333beb2c1”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“Index([’name’, ‘score’, ‘age’], dtype=‘object’)\n”
]
},
{
“data”: {
“text/plain”: [
" name score age\n",
“1 tom 60.5 15\n”,
“2 Jack 80.0 17\n”,
“3 Alice 40.5 21\n”,
“4 Bob 70.0 24\n”,
“5 John 85.5 30”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" name \n",
" score \n",
" age \n",
" \n",
" \n",
" \n",
" \n",
" 1 \n",
" tom \n",
" 60.5 \n",
" 15 \n",
" \n",
" \n",
" 2 \n",
" Jack \n",
" 80.0 \n",
" 17 \n",
" \n",
" \n",
" 3 \n",
" Alice \n",
" 40.5 \n",
" 21 \n",
" \n",
" \n",
" 4 \n",
" Bob \n",
" 70.0 \n",
" 24 \n",
" \n",
" \n",
" 5 \n",
" John \n",
" 85.5 \n",
" 30 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 50,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 50
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:05:14.608704Z”,
“start_time”: “2025-12-09T12:05:14.602380Z”
}
},
“cell_type”: “code”,
“source”: [
“print(df.loc[4]) # 按索引获取某行\n”,
“print(df.iloc[3]) # 按位置获取某行”
],
“id”: “a5257618ca7f32b3”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“name Bob\n”,
“score 70.0\n”,
“age 24\n”,
“Name: 4, dtype: object\n”,
“name Bob\n”,
“score 70.0\n”,
“age 24\n”,
“Name: 4, dtype: object\n”
]
}
],
“execution_count”: 49
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:07:27.469436Z”,
“start_time”: “2025-12-09T12:07:27.461484Z”
}
},
“cell_type”: “code”,
“source”: [
“# 获取某列\n”,
“print(df.loc[:,’name’])\n”,
“print(df.iloc[:,0])”
],
“id”: “41581843ecfd73a0”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“1 tom\n”,
“2 Jack\n”,
“3 Alice\n”,
“4 Bob\n”,
“5 John\n”,
“Name: name, dtype: object\n”,
“1 tom\n”,
“2 Jack\n”,
“3 Alice\n”,
“4 Bob\n”,
“5 John\n”,
“Name: name, dtype: object\n”
]
}
],
“execution_count”: 52
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:09:35.955912Z”,
“start_time”: “2025-12-09T12:09:35.949403Z”
}
},
“cell_type”: “code”,
“source”: [
“# 获取单个元素\n”,
“print(df.at[3, ‘score’])\n”,
“print(df.iat[2, 1])\n”,
“print(df.loc[3, ‘score’])\n”,
“print(df.iloc[2, 1])”
],
“id”: “8060f2b5cbe31b8b”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“40.5\n”,
“40.5\n”,
“40.5\n”,
“40.5\n”
]
}
],
“execution_count”: 56
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:18:58.216382Z”,
“start_time”: “2025-12-09T12:18:58.205902Z”
}
},
“cell_type”: “code”,
“source”: [
“# 获取单列数据\n”,
“print(df[’name’])\n”,
“print(type(df[’name’]))\n”,
“print(df.name)\n”,
“print(type(df.name))\n”,
“print(df[[’name’]])\n”,
“print(type(df[[’name’]]))\n”,
“\n”,
“# 获取多列数据\n”,
“print(df[[’name’, ‘age’]])”
],
“id”: “ee447e2e7ddae4f”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“1 tom\n”,
“2 Jack\n”,
“3 Alice\n”,
“4 Bob\n”,
“5 John\n”,
“Name: name, dtype: object\n”,
“<class ‘pandas.core.series.Series’>\n”,
“1 tom\n”,
“2 Jack\n”,
“3 Alice\n”,
“4 Bob\n”,
“5 John\n”,
“Name: name, dtype: object\n”,
“<class ‘pandas.core.series.Series’>\n”,
" name\n",
“1 tom\n”,
“2 Jack\n”,
“3 Alice\n”,
“4 Bob\n”,
“5 John\n”,
“<class ‘pandas.core.frame.DataFrame’>\n”,
" name age\n",
“1 tom 15\n”,
“2 Jack 17\n”,
“3 Alice 21\n”,
“4 Bob 24\n”,
“5 John 30\n”
]
}
],
“execution_count”: 63
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:19:41.030196Z”,
“start_time”: “2025-12-09T12:19:41.022590Z”
}
},
“cell_type”: “code”,
“source”: [
“# 查看部分数据\n”,
“print(df.head(2))\n”,
“print(df.tail(3))”
],
“id”: “8f5e4ba6c36702c9”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
" name score age\n",
“1 tom 60.5 15\n”,
“2 Jack 80.0 17\n”,
" name score age\n",
“3 Alice 40.5 21\n”,
“4 Bob 70.0 24\n”,
“5 John 85.5 30\n”
]
}
],
“execution_count”: 66
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:21:50.773778Z”,
“start_time”: “2025-12-09T12:21:50.765944Z”
}
},
“cell_type”: “code”,
“source”: [
“# 布尔索引\n”,
“df[df.score > 70]\n”,
“df[(df[‘score’] > 70) & (df.age < 20)]”
],
“id”: “3ddef970abc8f990”,
“outputs”: [
{
“data”: {
“text/plain”: [
" name score age\n",
“2 Jack 80.0 17”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" name \n",
" score \n",
" age \n",
" \n",
" \n",
" \n",
" \n",
" 2 \n",
" Jack \n",
" 80.0 \n",
" 17 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 70,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 70
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:22:18.207223Z”,
“start_time”: “2025-12-09T12:22:18.199349Z”
}
},
“cell_type”: “code”,
“source”: [
“# 随机抽样\n”,
“df.sample(3)”
],
“id”: “92e41c77ab05a6ca”,
“outputs”: [
{
“data”: {
“text/plain”: [
" name score age\n",
“4 Bob 70.0 24\n”,
“2 Jack 80.0 17\n”,
“3 Alice 40.5 21”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" name \n",
" score \n",
" age \n",
" \n",
" \n",
" \n",
" \n",
" 4 \n",
" Bob \n",
" 70.0 \n",
" 24 \n",
" \n",
" \n",
" 2 \n",
" Jack \n",
" 80.0 \n",
" 17 \n",
" \n",
" \n",
" 3 \n",
" Alice \n",
" 40.5 \n",
" 21 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 77,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 77
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:41:47.665923Z”,
“start_time”: “2025-12-09T12:41:47.656794Z”
}
},
“cell_type”: “code”,
“source”: “print(df.isin([‘Jack’, 17])) # 查看元素是否包含在参数集合中”,
“id”: “378ed0a0c89a9c81”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
" name score age\n",
“1 False False False\n”,
“2 True False True\n”,
“3 False False False\n”,
“4 False False False\n”,
“5 False False False\n”
]
}
],
“execution_count”: 80
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:42:23.723382Z”,
“start_time”: “2025-12-09T12:42:23.717382Z”
}
},
“cell_type”: “code”,
“source”: “print(df.isna()) # 查看元素是否是缺失值”,
“id”: “bc7a2a77c72ca23a”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
" name score age\n",
“1 False False False\n”,
“2 False False False\n”,
“3 False False False\n”,
“4 False False False\n”,
“5 False False False\n”
]
}
],
“execution_count”: 81
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:46:13.828041Z”,
“start_time”: “2025-12-09T12:46:13.816157Z”
}
},
“cell_type”: “code”,
“source”: [
“print(df[‘score’].sum()) # 求某列总和\n”,
“print(df.score.max())\n”,
“print(df.age.min())\n”,
“print(df.score.mean())\n”,
“print(df.score.median())\n”,
“print(df.score.mode()) # 众数\n”,
“print(df.score.std())\n”,
“print(df.score.var())\n”,
“print(df.score.quantile(q=0.25)) # 分位数”
],
“id”: “67aecea7434f0d93”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“336.5\n”,
“85.5\n”,
“15\n”,
“67.3\n”,
“70.0\n”,
“0 40.5\n”,
“1 60.5\n”,
“2 70.0\n”,
“3 80.0\n”,
“4 85.5\n”,
“Name: score, dtype: float64\n”,
“17.778498249289786\n”,
“316.07500000000005\n”,
“60.5\n”
]
}
],
“execution_count”: 90
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:46:39.055763Z”,
“start_time”: “2025-12-09T12:46:39.041122Z”
}
},
“cell_type”: “code”,
“source”: “print(df.describe())”,
“id”: “4ca3d5144c854114”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
" score age\n",
“count 5.000000 5.00000\n”,
“mean 67.300000 21.40000\n”,
“std 17.778498 5.94138\n”,
“min 40.500000 15.00000\n”,
“25% 60.500000 17.00000\n”,
“50% 70.000000 21.00000\n”,
“75% 80.000000 24.00000\n”,
“max 85.500000 30.00000\n”
]
}
],
“execution_count”: 91
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:47:29.473591Z”,
“start_time”: “2025-12-09T12:47:29.468713Z”
}
},
“cell_type”: “code”,
“source”: “print(df.count()) # 每一列非缺失值的个数”,
“id”: “88c4105b0cdd5f07”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“name 5\n”,
“score 5\n”,
“age 5\n”,
“dtype: int64\n”
]
}
],
“execution_count”: 92
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:47:54.296890Z”,
“start_time”: “2025-12-09T12:47:54.284452Z”
}
},
“cell_type”: “code”,
“source”: “print(df.value_counts()) # 出现的次数”,
“id”: “f208e236bb0dfe44”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“name score age\n”,
“Alice 40.5 21 1\n”,
“Bob 70.0 24 1\n”,
“Jack 80.0 17 1\n”,
“John 85.5 30 1\n”,
“tom 60.5 15 1\n”,
“Name: count, dtype: int64\n”
]
}
],
“execution_count”: 93
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:48:55.953044Z”,
“start_time”: “2025-12-09T12:48:55.942503Z”
}
},
“cell_type”: “code”,
“source”: “print(df.drop_duplicates())”,
“id”: “69cd3b8261d312bf”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
" name score age\n",
“1 tom 60.5 15\n”,
“2 Jack 80.0 17\n”,
“3 Alice 40.5 21\n”,
“4 Bob 70.0 24\n”,
“5 John 85.5 30\n”
]
}
],
“execution_count”: 94
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:49:49.939087Z”,
“start_time”: “2025-12-09T12:49:49.933117Z”
}
},
“cell_type”: “code”,
“source”: “print(df.duplicated(subset=[‘score’])) # 查看是否重复”,
“id”: “42dbc6c91e6410d3”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
“1 False\n”,
“2 False\n”,
“3 False\n”,
“4 False\n”,
“5 False\n”,
“dtype: bool\n”
]
}
],
“execution_count”: 96
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:50:43.799105Z”,
“start_time”: “2025-12-09T12:50:43.787907Z”
}
},
“cell_type”: “code”,
“source”: “df.sample(2) # 随机抽样”,
“id”: “13388bd9372bf62f”,
“outputs”: [
{
“data”: {
“text/plain”: [
" name score age\n",
“1 tom 60.5 15\n”,
“5 John 85.5 30”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" name \n",
" score \n",
" age \n",
" \n",
" \n",
" \n",
" \n",
" 1 \n",
" tom \n",
" 60.5 \n",
" 15 \n",
" \n",
" \n",
" 5 \n",
" John \n",
" 85.5 \n",
" 30 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 99,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 99
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:51:02.942597Z”,
“start_time”: “2025-12-09T12:51:02.935053Z”
}
},
“cell_type”: “code”,
“source”: “print(df.replace(15, 30))”,
“id”: “270dd2e581aa2316”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
" name score age\n",
“1 tom 60.5 30\n”,
“2 Jack 80.0 17\n”,
“3 Alice 40.5 21\n”,
“4 Bob 70.0 24\n”,
“5 John 85.5 30\n”
]
}
],
“execution_count”: 100
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:53:35.358710Z”,
“start_time”: “2025-12-09T12:53:35.348339Z”
}
},
“cell_type”: “code”,
“source”: [
“df.cumsum()\n”,
“df.cummax(axis=0)”
],
“id”: “4a99ac935b6455d5”,
“outputs”: [
{
“data”: {
“text/plain”: [
" name score age\n",
“1 tom 60.5 15\n”,
“2 tom 80.0 17\n”,
“3 tom 80.0 21\n”,
“4 tom 80.0 24\n”,
“5 tom 85.5 30”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" name \n",
" score \n",
" age \n",
" \n",
" \n",
" \n",
" \n",
" 1 \n",
" tom \n",
" 60.5 \n",
" 15 \n",
" \n",
" \n",
" 2 \n",
" tom \n",
" 80.0 \n",
" 17 \n",
" \n",
" \n",
" 3 \n",
" tom \n",
" 80.0 \n",
" 21 \n",
" \n",
" \n",
" 4 \n",
" tom \n",
" 80.0 \n",
" 24 \n",
" \n",
" \n",
" 5 \n",
" tom \n",
" 85.5 \n",
" 30 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 105,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 105
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:54:04.052639Z”,
“start_time”: “2025-12-09T12:54:04.046618Z”
}
},
“cell_type”: “code”,
“source”: “print(df.sort_index(ascending=False))”,
“id”: “db77eb5790d7e93a”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
" name score age\n",
“5 John 85.5 30\n”,
“4 Bob 70.0 24\n”,
“3 Alice 40.5 21\n”,
“2 Jack 80.0 17\n”,
“1 tom 60.5 15\n”
]
}
],
“execution_count”: 107
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:56:48.214795Z”,
“start_time”: “2025-12-09T12:56:48.203817Z”
}
},
“cell_type”: “code”,
“source”: [
“# 排序 多条件排序\n”,
“print(df.sort_values(by=[‘score’, ‘age’], ascending=[True, False]))”
],
“id”: “8e2167b3443eed5e”,
“outputs”: [
{
“name”: “stdout”,
“output_type”: “stream”,
“text”: [
" name score age\n",
“3 Alice 40.5 21\n”,
“1 tom 60.5 15\n”,
“4 Bob 70.0 24\n”,
“2 Jack 80.0 17\n”,
“5 John 85.5 30\n”
]
}
],
“execution_count”: 109
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-09T12:58:32.047301Z”,
“start_time”: “2025-12-09T12:58:32.034998Z”
}
},
“cell_type”: “code”,
“source”: [
“df.nlargest(2, columns=[‘score’, ‘age’])\n”,
“df.nsmallest(2, columns=[‘score’, ‘age’])”
],
“id”: “7b535143a55f00c2”,
“outputs”: [
{
“data”: {
“text/plain”: [
" name score age\n",
“3 Alice 40.5 21\n”,
“1 tom 60.5 15”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" name \n",
" score \n",
" age \n",
" \n",
" \n",
" \n",
" \n",
" 3 \n",
" Alice \n",
" 40.5 \n",
" 21 \n",
" \n",
" \n",
" 1 \n",
" tom \n",
" 60.5 \n",
" 15 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 112,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 112
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-10T02:37:57.175517Z”,
“start_time”: “2025-12-10T02:37:57.158638Z”
}
},
“cell_type”: “code”,
“source”: [
“import pandas as pd\n”,
“\n”,
“data = {\n”,
" ‘姓名’: [‘张三’, ‘李四’, ‘王五’, ‘赵六’, ‘钱七’],\n",
" ‘数学’: [85, 92, 78, 88, 95],\n",
" ‘英语’: [90, 88, 85, 92, 80],\n",
" ‘物理’: [75, 80, 88, 85, 90]\n",
“}\n”,
“scores = pd.DataFrame(data)\n”,
“scores”
],
“id”: “19dccf6c90456ae9”,
“outputs”: [
{
“data”: {
“text/plain”: [
" 姓名 数学 英语 物理\n",
“0 张三 85 90 75\n”,
“1 李四 92 88 80\n”,
“2 王五 78 85 88\n”,
“3 赵六 88 92 85\n”,
“4 钱七 95 80 90”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" 姓名 \n",
" 数学 \n",
" 英语 \n",
" 物理 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 张三 \n",
" 85 \n",
" 90 \n",
" 75 \n",
" \n",
" \n",
" 1 \n",
" 李四 \n",
" 92 \n",
" 88 \n",
" 80 \n",
" \n",
" \n",
" 2 \n",
" 王五 \n",
" 78 \n",
" 85 \n",
" 88 \n",
" \n",
" \n",
" 3 \n",
" 赵六 \n",
" 88 \n",
" 92 \n",
" 85 \n",
" \n",
" \n",
" 4 \n",
" 钱七 \n",
" 95 \n",
" 80 \n",
" 90 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 2,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 2
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-10T08:45:48.933103Z”,
“start_time”: “2025-12-10T08:45:48.908279Z”
}
},
“cell_type”: “code”,
“source”: [
“scores[‘总分’] = scores[[‘数学’, ‘英语’, ‘物理’]].sum(axis=1)\n”,
“scores[‘平均分’] = scores[‘总分’] / 3\n”,
“scores[‘平均分2’] = scores[[‘数学’, ‘英语’, ‘物理’]].mean(axis=1)\n”,
“\n”,
“scores[(scores[‘数学’] > 90) | (scores[‘英语’] > 85)]\n”,
“\n”,
“scores.sort_values(‘总分’, ascending=False).head(3)\n”,
“scores.nlargest(3, columns=[‘总分’])”
],
“id”: “c1919b37cf1fcb1b”,
“outputs”: [
{
“data”: {
“text/plain”: [
" 姓名 数学 英语 物理 总分 平均分 平均分2\n",
“3 赵六 88 92 85 265 88.333333 88.333333\n”,
“4 钱七 95 80 90 265 88.333333 88.333333\n”,
“1 李四 92 88 80 260 86.666667 86.666667”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" 姓名 \n",
" 数学 \n",
" 英语 \n",
" 物理 \n",
" 总分 \n",
" 平均分 \n",
" 平均分2 \n",
" \n",
" \n",
" \n",
" \n",
" 3 \n",
" 赵六 \n",
" 88 \n",
" 92 \n",
" 85 \n",
" 265 \n",
" 88.333333 \n",
" 88.333333 \n",
" \n",
" \n",
" 4 \n",
" 钱七 \n",
" 95 \n",
" 80 \n",
" 90 \n",
" 265 \n",
" 88.333333 \n",
" 88.333333 \n",
" \n",
" \n",
" 1 \n",
" 李四 \n",
" 92 \n",
" 88 \n",
" 80 \n",
" 260 \n",
" 86.666667 \n",
" 86.666667 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 23,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 23
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-10T08:53:10.561197Z”,
“start_time”: “2025-12-10T08:53:10.547531Z”
}
},
“cell_type”: “code”,
“source”: [
“data = {\n”,
" ‘产品名称’: [‘A’, ‘B’, ‘C’, ‘D’],\n",
" ‘单价’: [100, 150, 200, 120],\n",
" ‘销量’: [50, 30, 20, 40]\n",
“}\n”,
“df = pd.DataFrame(data)\n”,
“df[‘总销售额’] = df[‘单价’]*df[‘销量’]\n”,
“df.nlargest(1, columns=[‘总销售额’])\n”,
“df.sort_values(‘总销售额’, ascending=False)”
],
“id”: “ce5c07a83de55698”,
“outputs”: [
{
“data”: {
“text/plain”: [
" 产品名称 单价 销量 总销售额\n",
“0 A 100 50 5000\n”,
“3 D 120 40 4800\n”,
“1 B 150 30 4500\n”,
“2 C 200 20 4000”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" 产品名称 \n",
" 单价 \n",
" 销量 \n",
" 总销售额 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A \n",
" 100 \n",
" 50 \n",
" 5000 \n",
" \n",
" \n",
" 3 \n",
" D \n",
" 120 \n",
" 40 \n",
" 4800 \n",
" \n",
" \n",
" 1 \n",
" B \n",
" 150 \n",
" 30 \n",
" 4500 \n",
" \n",
" \n",
" 2 \n",
" C \n",
" 200 \n",
" 20 \n",
" 4000 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 27,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 27
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-10T09:01:47.630055Z”,
“start_time”: “2025-12-10T09:01:47.617201Z”
}
},
“cell_type”: “code”,
“source”: [
“data = {\n”,
" ‘用户ID’: [101, 102, 103, 104, 105],\n",
" ‘用户名’: [‘Alice’, ‘Bob’, ‘Charlie’, ‘David’, ‘Eve’],\n",
" ‘商品类别’: [‘电子产品’, ‘服饰’, ‘电子产品’, ‘家居’, ‘服饰’],\n",
" ‘商品单价’: [1200, 300, 800, 150, 200],\n",
" ‘购买数量’: [1, 3, 2 , 5, 4]\n",
“}\n”,
“df = pd.DataFrame(data)\n”,
“df”
],
“id”: “e974cb01bd74f6fe”,
“outputs”: [
{
“data”: {
“text/plain”: [
" 用户ID 用户名 商品类别 商品单价 购买数量\n",
“0 101 Alice 电子产品 1200 1\n”,
“1 102 Bob 服饰 300 3\n”,
“2 103 Charlie 电子产品 800 2\n”,
“3 104 David 家居 150 5\n”,
“4 105 Eve 服饰 200 4”
],
“text/html”: [
“\n”,
“\n”,
“<table border="1" class="dataframe">\n”,
" \n",
" <tr style="text-align: right;">\n",
" \n",
" 用户ID \n",
" 用户名 \n",
" 商品类别 \n",
" 商品单价 \n",
" 购买数量 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 101 \n",
" Alice \n",
" 电子产品 \n",
" 1200 \n",
" 1 \n",
" \n",
" \n",
" 1 \n",
" 102 \n",
" Bob \n",
" 服饰 \n",
" 300 \n",
" 3 \n",
" \n",
" \n",
" 2 \n",
" 103 \n",
" Charlie \n",
" 电子产品 \n",
" 800 \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 104 \n",
" David \n",
" 家居 \n",
" 150 \n",
" 5 \n",
" \n",
" \n",
" 4 \n",
" 105 \n",
" Eve \n",
" 服饰 \n",
" 200 \n",
" 4 \n",
" \n",
" \n",
“\n”,
“
”
]
},
“execution_count”: 29,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 29
},
{
“metadata”: {
“ExecuteTime”: {
“end_time”: “2025-12-10T09:07:21.363804Z”,
“start_time”: “2025-12-10T09:07:21.353481Z”
}
},
“cell_type”: “code”,
“source”: [
“df[‘消费金额’] = df[‘商品单价’] * df[‘购买数量’]\n”,
“df.nlargest(1, columns=‘消费金额’)\n”,
“df[‘消费金额’].mean()\n”,
“df[df[‘商品类别’] == ‘电子产品’][‘购买数量’].sum()”
],
“id”: “cac277da1558dad5”,
“outputs”: [
{
“data”: {
“text/plain”: [
“3”
]
},
“execution_count”: 35,
“metadata”: {},
“output_type”: “execute_result”
}
],
“execution_count”: 35
},
{
“metadata”: {},
“cell_type”: “code”,
“outputs”: [],
“execution_count”: null,
“source”: “”,
“id”: “793de8ba7f31932c”
}
],
“metadata”: {
“kernelspec”: {
“display_name”: “Python 3”,
“language”: “python”,
“name”: “python3”
},
“language_info”: {
“codemirror_mode”: {
“name”: “ipython”,
“version”: 2
},
“file_extension”: “.py”,
“mimetype”: “text/x-python”,
“name”: “python”,
“nbconvert_exporter”: “python”,
“pygments_lexer”: “ipython2”,
“version”: “2.7.6”
}
},
“nbformat”: 4,
“nbformat_minor”: 5
}