1 | import pandas as pd |
数据定义及使用
定义序列, pandas中的数据形式通常是float32或float64
1 | 1,3,4,np.nan,11,99]) s = pd.Series([ |
0 1.0
1 3.0
2 4.0
3 NaN
4 11.0
5 99.0
dtype: float64
定义日期列表
1 | '20180927',periods=6) dates = pd.date_range( |
DatetimeIndex(['2018-09-27', '2018-09-28', '2018-09-29', '2018-09-30',
'2018-10-01', '2018-10-02'],
dtype='datetime64[ns]', freq='D')
DataFrame类似于numpy的array
不指定索引的DataFrame
1 | 12).reshape(3, 4)) df = pd.DataFrame(np.arange( |
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 0 | 1 | 2 | 3 |
1 | 4 | 5 | 6 | 7 |
2 | 8 | 9 | 10 | 11 |
DataFrame的定义
1 | >>>df = pd.DataFrame({'A': 1., 'B': 'Foo', 'C': np.array([3] * 4)}) |
A | B | C | |
---|---|---|---|
0 | 1.0 | Foo | 3 |
1 | 1.0 | Foo | 3 |
2 | 1.0 | Foo | 3 |
3 | 1.0 | Foo | 3 |
行索引为dates, 列索引为[a, b, c, d]
1 | 6,4),index=dates,columns=['a','b','c','d']) df = pd.DataFrame(np.random.randn( |
a | b | c | d | |
---|---|---|---|---|
2018-09-27 | 0.338831 | 1.036578 | -2.573243 | -2.204440 |
2018-09-28 | -0.473799 | -0.646747 | 0.435539 | -0.758833 |
2018-09-29 | 0.305467 | 0.816041 | -0.116044 | 1.197494 |
2018-09-30 | -0.435368 | -0.082337 | 0.624486 | -1.234057 |
2018-10-01 | -0.667478 | 0.233274 | -1.380012 | 2.261031 |
2018-10-02 | -0.654250 | -0.180604 | 0.609015 | -0.924455 |
查看DataFrame的数据类型
1 | df.dtypes |
a float64
b float64
c float64
d float64
dtype: object
查看DataFrame的索引
1 | df.index |
DatetimeIndex(['2018-09-27', '2018-09-28', '2018-09-29', '2018-09-30',
'2018-10-01', '2018-10-02'],
dtype='datetime64[ns]', freq='D')
查看DataFrame的列索引
1 | df.columns |
Index(['a', 'b', 'c', 'd'], dtype='object')
查看DataFrame的值
1 | df.values |
array([[ 0.33883139, 1.03657755, -2.5732431 , -2.20443975],
[-0.47379902, -0.64674734, 0.43553894, -0.75883344],
[ 0.30546684, 0.81604074, -0.11604421, 1.19749384],
[-0.43536792, -0.08233739, 0.62448617, -1.23405699],
[-0.66747791, 0.23327389, -1.38001185, 2.26103083],
[-0.65425047, -0.18060444, 0.60901542, -0.92445528]])
查看DataFrame的描述
1 | df.describe() |
a | b | c | d | |
---|---|---|---|---|
count | 6.000000 | 6.000000 | 6.000000 | 6.000000 |
mean | -0.264433 | 0.196034 | -0.400043 | -0.277210 |
std | 0.463933 | 0.635914 | 1.306128 | 1.667218 |
min | -0.667478 | -0.646747 | -2.573243 | -2.204440 |
25% | -0.609138 | -0.156038 | -1.064020 | -1.156657 |
50% | -0.454583 | 0.075468 | 0.159747 | -0.841644 |
75% | 0.120258 | 0.670349 | 0.565646 | 0.708412 |
max | 0.338831 | 1.036578 | 0.624486 | 2.261031 |
DataFrame的转置
1 | df.T |
2018-09-27 00:00:00 | 2018-09-28 00:00:00 | 2018-09-29 00:00:00 | 2018-09-30 00:00:00 | 2018-10-01 00:00:00 | 2018-10-02 00:00:00 | |
---|---|---|---|---|---|---|
a | 0.338831 | -0.473799 | 0.305467 | -0.435368 | -0.667478 | -0.654250 |
b | 1.036578 | -0.646747 | 0.816041 | -0.082337 | 0.233274 | -0.180604 |
c | -2.573243 | 0.435539 | -0.116044 | 0.624486 | -1.380012 | 0.609015 |
d | -2.204440 | -0.758833 | 1.197494 | -1.234057 | 2.261031 | -0.924455 |
DataFrame的index排序
1 | 1) df.sort_index(axis= |
a | b | c | d | |
---|---|---|---|---|
2018-09-27 | 0.338831 | 1.036578 | -2.573243 | -2.204440 |
2018-09-28 | -0.473799 | -0.646747 | 0.435539 | -0.758833 |
2018-09-29 | 0.305467 | 0.816041 | -0.116044 | 1.197494 |
2018-09-30 | -0.435368 | -0.082337 | 0.624486 | -1.234057 |
2018-10-01 | -0.667478 | 0.233274 | -1.380012 | 2.261031 |
2018-10-02 | -0.654250 | -0.180604 | 0.609015 | -0.924455 |
DataFrame的index排序, 逆序
1 | 1,ascending=False) df.sort_index(axis= |
d | c | b | a | |
---|---|---|---|---|
2018-09-27 | -2.204440 | -2.573243 | 1.036578 | 0.338831 |
2018-09-28 | -0.758833 | 0.435539 | -0.646747 | -0.473799 |
2018-09-29 | 1.197494 | -0.116044 | 0.816041 | 0.305467 |
2018-09-30 | -1.234057 | 0.624486 | -0.082337 | -0.435368 |
2018-10-01 | 2.261031 | -1.380012 | 0.233274 | -0.667478 |
2018-10-02 | -0.924455 | 0.609015 | -0.180604 | -0.654250 |
DataFrame按值排序
1 | 'c') df.sort_values(by= |
a | b | c | d | |
---|---|---|---|---|
2018-09-27 | 0.338831 | 1.036578 | -2.573243 | -2.204440 |
2018-10-01 | -0.667478 | 0.233274 | -1.380012 | 2.261031 |
2018-09-29 | 0.305467 | 0.816041 | -0.116044 | 1.197494 |
2018-09-28 | -0.473799 | -0.646747 | 0.435539 | -0.758833 |
2018-10-02 | -0.654250 | -0.180604 | 0.609015 | -0.924455 |
2018-09-30 | -0.435368 | -0.082337 | 0.624486 | -1.234057 |