Merge branch 'master' of https://github.com/datawhalechina/team-learning-program
This commit is contained in:
396
IntroductionToNumpy/Task06 输入输出.md
Normal file
396
IntroductionToNumpy/Task06 输入输出.md
Normal file
@@ -0,0 +1,396 @@
|
||||
# 输入和输出
|
||||
|
||||
|
||||
|
||||
## 1. numpy 二进制文件
|
||||
|
||||
`save()`、`savez()`和`load()`函数以 numpy 专用的二进制类型(`.npy`、`.npz`)保存和读取数据,这三个函数会自动处理ndim、dtype、shape等信息,使用它们读写数组非常方便,但是`save()`和`savez()`输出的文件很难与其它语言编写的程序兼容。
|
||||
|
||||
|
||||
|
||||
|
||||
【函数】
|
||||
```python
|
||||
def save(file, arr, allow_pickle=True, fix_imports=True):
|
||||
```
|
||||
- `save()`函数:以`.npy`格式将数组保存到二进制文件中。
|
||||
- `.npy`格式:以二进制的方式存储文件,在二进制文件第一行以文本形式保存了数据的元信息(ndim,dtype,shape等),可以用二进制工具查看内容。
|
||||
|
||||
|
||||
|
||||
【函数】
|
||||
```python
|
||||
def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII'):
|
||||
```
|
||||
|
||||
- `load()`函数:从`.npy`、`.npz`或 pickled文件加载数组或pickled对象。
|
||||
- `mmap_mode: {None, ‘r+’, ‘r’, ‘w+’, ‘c’};`:读取文件的方式。
|
||||
- `allow_pickle=False`:允许加载存储在`.npy`文件中的pickled对象数组。
|
||||
- `fix_imports=True`:若为True,pickle将尝试将旧的python2名称映射到python3中使用的新名称。
|
||||
- `encoding='ASCII'`:制定编码格式,默认为“ASCII”。
|
||||
|
||||
|
||||
【例子】将一个数组保存到一个文件中。
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
outfile = r'.\test.npy'
|
||||
np.random.seed(20200619)
|
||||
x = np.random.uniform(low=0, high=1,size = [3, 5])
|
||||
np.save(outfile, x)
|
||||
y = np.load(outfile)
|
||||
print(y)
|
||||
# [[0.01123594 0.66790705 0.50212171 0.7230908 0.61668256]
|
||||
# [0.00668332 0.1234096 0.96092409 0.67925305 0.38596837]
|
||||
# [0.72342998 0.26258324 0.24318845 0.98795012 0.77370715]]
|
||||
```
|
||||
|
||||
【函数】
|
||||
```python
|
||||
def savez(file, *args, **kwds):
|
||||
```
|
||||
|
||||
- `savez()`函数:以未压缩的`.npz`格式将多个数组保存到单个文件中。
|
||||
- `.npz`格式:以压缩打包的方式存储文件,可以用压缩软件解压。
|
||||
- `savez()`函数:第一个参数是文件名,其后的参数都是需要保存的数组,也可以使用关键字参数为数组起一个名字,非关键字参数传递的数组会自动起名为`arr_0, arr_1, …`。
|
||||
- `savez()`函数:输出的是一个压缩文件(扩展名为`.npz`),其中每个文件都是一个`save()`保存的`.npy`文件,文件名对应于数组名。`load()`自动识别`.npz`文件,并且返回一个类似于字典的对象,可以通过数组名作为关键字获取数组的内容。
|
||||
|
||||
【例子】将多个数组保存到一个文件。
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
outfile = r'.\test.npz'
|
||||
x = np.linspace(0, np.pi, 5)
|
||||
y = np.sin(x)
|
||||
z = np.cos(x)
|
||||
np.savez(outfile, x, y, z_d=z)
|
||||
data = np.load(outfile)
|
||||
np.set_printoptions(suppress=True)
|
||||
print(data.files)
|
||||
# ['z_d', 'arr_0', 'arr_1']
|
||||
|
||||
print(data['arr_0'])
|
||||
# [0. 0.78539816 1.57079633 2.35619449 3.14159265]
|
||||
|
||||
print(data['arr_1'])
|
||||
# [0. 0.70710678 1. 0.70710678 0. ]
|
||||
|
||||
print(data['z_d'])
|
||||
# [ 1. 0.70710678 0. -0.70710678 -1. ]
|
||||
```
|
||||
|
||||
用解压软件打开 test.npz 文件,会发现其中有三个文件:`arr_0.npy,arr_1.npy,z_d.npy`,其中分别保存着数组`x,y,z`的内容。
|
||||
|
||||
|
||||
---
|
||||
## 2. 文本文件
|
||||
`savetxt()`,`loadtxt()`和`genfromtxt()`函数用来存储和读取文本文件(如`.TXT`,`.CSV`等)。`genfromtxt()`比`loadtxt()`更加强大,可对缺失数据进行处理。
|
||||
|
||||
|
||||
【函数】
|
||||
```python
|
||||
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n',header='', footer='', comments='# ', encoding=None):
|
||||
```
|
||||
- `fname`:文件路径
|
||||
- `X`:存入文件的数组。
|
||||
- `fmt='%.18e'`:写入文件中每个元素的字符串格式,默认'%.18e'(保留18位小数的浮点数形式)。
|
||||
- `delimiter=' '`:分割字符串,默认以空格分隔。
|
||||
|
||||
```python
|
||||
def loadtxt(fname, dtype=float, comments='#', delimiter=None,
|
||||
converters=None, skiprows=0, usecols=None, unpack=False,
|
||||
ndmin=0, encoding='bytes', max_rows=None):
|
||||
```
|
||||
- `fname`:文件路径。
|
||||
- `dtype=float`:数据类型,默认为float。
|
||||
- `comments='#'`: 字符串或字符串组成的列表,默认为'#',表示注释字符集开始的标志。
|
||||
- `skiprows=0`:跳过多少行,一般跳过第一行表头。
|
||||
- `usecols=None`:元组(元组内数据为列的数值索引), 用来指定要读取数据的列(第一列为0)。
|
||||
- `unpack=False`:当加载多列数据时是否需要将数据列进行解耦赋值给不同的变量。
|
||||
|
||||
|
||||
【例】写入和读出TXT文件。
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
outfile = r'.\test.txt'
|
||||
x = np.arange(0, 10).reshape(2, -1)
|
||||
np.savetxt(outfile, x)
|
||||
y = np.loadtxt(outfile)
|
||||
print(y)
|
||||
# [[0. 1. 2. 3. 4.]
|
||||
# [5. 6. 7. 8. 9.]]
|
||||
```
|
||||
|
||||
test.txt文件如下:
|
||||
|
||||
```python
|
||||
0.000000000000000000e+00 1.000000000000000000e+00 2.000000000000000000e+00 3.000000000000000000e+00 4.000000000000000000e+00
|
||||
5.000000000000000000e+00 6.000000000000000000e+00 7.000000000000000000e+00 8.000000000000000000e+00 9.000000000000000000e+00
|
||||
```
|
||||
|
||||
|
||||
【例子】写入和读出CSV文件。
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
outfile = r'.\test.csv'
|
||||
x = np.arange(0, 10, 0.5).reshape(4, -1)
|
||||
np.savetxt(outfile, x, fmt='%.3f', delimiter=',')
|
||||
y = np.loadtxt(outfile, delimiter=',')
|
||||
print(y)
|
||||
# [[0. 0.5 1. 1.5 2. ]
|
||||
# [2.5 3. 3.5 4. 4.5]
|
||||
# [5. 5.5 6. 6.5 7. ]
|
||||
# [7.5 8. 8.5 9. 9.5]]
|
||||
```
|
||||
|
||||
test.csv文件如下:
|
||||
```python
|
||||
0.000,0.500,1.000,1.500,2.000
|
||||
2.500,3.000,3.500,4.000,4.500
|
||||
5.000,5.500,6.000,6.500,7.000
|
||||
7.500,8.000,8.500,9.000,9.500
|
||||
```
|
||||
|
||||
|
||||
|
||||
【函数】
|
||||
|
||||
```python
|
||||
def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
|
||||
skip_header=0, skip_footer=0, converters=None,
|
||||
missing_values=None, filling_values=None, usecols=None,
|
||||
names=None, excludelist=None,
|
||||
deletechars=''.join(sorted(NameValidator.defaultdeletechars)),
|
||||
replace_space='_', autostrip=False, case_sensitive=True,
|
||||
defaultfmt="f%i", unpack=None, usemask=False, loose=True,
|
||||
invalid_raise=True, max_rows=None, encoding='bytes'):
|
||||
```
|
||||
- `genfromtxt()`函数:从文本文件加载数据,并按指定方式处理缺少的值(是面向结构数组和缺失数据处理的。)。
|
||||
- `names=None`:设置为True时,程序将把第一行作为列名称。
|
||||
|
||||
|
||||
data.csv文件(不带缺失值)
|
||||
```python
|
||||
id,value1,value2,value3
|
||||
1,123,1.4,23
|
||||
2,110,0.5,18
|
||||
3,164,2.1,19
|
||||
```
|
||||
|
||||
【例子】
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
outfile = r'.\data.csv'
|
||||
x = np.loadtxt(outfile, delimiter=',', skiprows=1)
|
||||
print(x)
|
||||
# [[ 1. 123. 1.4 23. ]
|
||||
# [ 2. 110. 0.5 18. ]
|
||||
# [ 3. 164. 2.1 19. ]]
|
||||
|
||||
x = np.loadtxt(outfile, delimiter=',', skiprows=1, usecols=(1, 2))
|
||||
print(x)
|
||||
# [[123. 1.4]
|
||||
# [110. 0.5]
|
||||
# [164. 2.1]]
|
||||
|
||||
val1, val2 = np.loadtxt(outfile, delimiter=',', skiprows=1, usecols=(1, 2), unpack=True)
|
||||
print(val1) # [123. 110. 164.]
|
||||
print(val2) # [1.4 0.5 2.1]
|
||||
```
|
||||
|
||||
|
||||
【例子】
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
outfile = r'.\data.csv'
|
||||
x = np.genfromtxt(outfile, delimiter=',', names=True)
|
||||
print(x)
|
||||
# [(1., 123., 1.4, 23.) (2., 110., 0.5, 18.) (3., 164., 2.1, 19.)]
|
||||
|
||||
print(type(x))
|
||||
# <class 'numpy.ndarray'>
|
||||
|
||||
print(x.dtype)
|
||||
# [('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value3', '<f8')]
|
||||
|
||||
print(x['id']) # [1. 2. 3.]
|
||||
print(x['value1']) # [123. 110. 164.]
|
||||
print(x['value2']) # [1.4 0.5 2.1]
|
||||
print(x['value3']) # [23. 18. 19.]
|
||||
```
|
||||
|
||||
data1.csv文件(带有缺失值)
|
||||
```python
|
||||
id,value1,value2,value3
|
||||
1,123,1.4,23
|
||||
2,110,,18
|
||||
3,,2.1,19
|
||||
```
|
||||
|
||||
【例子】
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
outfile = r'.\data1.csv'
|
||||
x = np.genfromtxt(outfile, delimiter=',', names=True)
|
||||
print(x)
|
||||
# [(1., 123., 1.4, 23.) (2., 110., nan, 18.) (3., nan, 2.1, 19.)]
|
||||
|
||||
print(type(x))
|
||||
# <class 'numpy.ndarray'>
|
||||
|
||||
print(x.dtype)
|
||||
# [('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value3', '<f8')]
|
||||
|
||||
print(x['id']) # [1. 2. 3.]
|
||||
print(x['value1']) # [123. 110. nan]
|
||||
print(x['value2']) # [1.4 nan 2.1]
|
||||
print(x['value3']) # [23. 18. 19.]
|
||||
```
|
||||
|
||||
---
|
||||
## 3. 文本格式选项
|
||||
|
||||
【函数】
|
||||
```python
|
||||
def set_printoptions(precision=None, threshold=None, edgeitems=None,
|
||||
linewidth=None, suppress=None, nanstr=None, infstr=None,
|
||||
formatter=None, sign=None, floatmode=None, **kwarg):
|
||||
```
|
||||
- `set_printoptions()`函数:设置打印选项。这些选项决定浮点数、数组和其它NumPy对象的显示方式。
|
||||
- `precision=8`:设置浮点精度,控制输出的小数点个数,默认是8。
|
||||
- `threshold=1000`:概略显示,超过该值则以“…”的形式来表示,默认是1000。
|
||||
- `linewidth=75`:用于确定每行多少字符数后插入换行符,默认为75。
|
||||
- `suppress=False`:当`suppress=True`,表示小数不需要以科学计数法的形式输出,默认是False。
|
||||
- `nanstr=nan`:浮点非数字的字符串表示形式,默认`nan`。
|
||||
- `infstr=inf`:浮点无穷大的字符串表示形式,默认`inf`。
|
||||
- `formatter`:一个字典,自定义格式化用于显示的数组元素。键为需要格式化的类型,值为格式化的字符串。
|
||||
- 'bool'
|
||||
- 'int'
|
||||
- 'float'
|
||||
- 'str' : all other strings
|
||||
- 'all' : sets all types
|
||||
- ...
|
||||
|
||||
【例子】
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
np.set_printoptions(precision=4)
|
||||
x = np.array([1.123456789])
|
||||
print(x) # [1.1235]
|
||||
|
||||
np.set_printoptions(threshold=20)
|
||||
x = np.arange(50)
|
||||
print(x) # [ 0 1 2 ... 47 48 49]
|
||||
|
||||
np.set_printoptions(threshold=np.iinfo(np.int).max)
|
||||
print(x)
|
||||
# [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
|
||||
# 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
||||
# 48 49]
|
||||
|
||||
eps = np.finfo(float).eps
|
||||
x = np.arange(4.)
|
||||
x = x ** 2 - (x + eps) ** 2
|
||||
print(x)
|
||||
# [-4.9304e-32 -4.4409e-16 0.0000e+00 0.0000e+00]
|
||||
np.set_printoptions(suppress=True)
|
||||
print(x) # [-0. -0. 0. 0.]
|
||||
|
||||
x = np.linspace(0, 10, 10)
|
||||
print(x)
|
||||
# [ 0. 1.1111 2.2222 3.3333 4.4444 5.5556 6.6667 7.7778 8.8889
|
||||
# 10. ]
|
||||
np.set_printoptions(precision=2, suppress=True, threshold=5)
|
||||
print(x) # [ 0. 1.11 2.22 ... 7.78 8.89 10. ]
|
||||
|
||||
np.set_printoptions(formatter={'all': lambda x: 'int: ' + str(-x)})
|
||||
x = np.arange(3)
|
||||
print(x) # [int: 0 int: -1 int: -2]
|
||||
|
||||
np.set_printoptions() # formatter gets reset
|
||||
print(x) # [0 1 2]
|
||||
```
|
||||
|
||||
【例子】恢复默认选项
|
||||
|
||||
```python
|
||||
np.set_printoptions(edgeitems=3, infstr='inf', linewidth=75,
|
||||
nanstr='nan', precision=8, suppress=False,
|
||||
threshold=1000, formatter=None)
|
||||
```
|
||||
|
||||
|
||||
【函数】
|
||||
|
||||
```python
|
||||
def get_printoptions():
|
||||
```
|
||||
|
||||
- `get_printoptions()`函数:获取当前打印选项。
|
||||
|
||||
【例子】
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
x = np.get_printoptions()
|
||||
print(x)
|
||||
# {
|
||||
# 'edgeitems': 3,
|
||||
# 'threshold': 1000,
|
||||
# 'floatmode': 'maxprec',
|
||||
# 'precision': 8,
|
||||
# 'suppress': False,
|
||||
# 'linewidth': 75,
|
||||
# 'nanstr': 'nan',
|
||||
# 'infstr': 'inf',
|
||||
# 'sign': '-',
|
||||
# 'formatter': None,
|
||||
# 'legacy': False
|
||||
# }
|
||||
```
|
||||
|
||||
---
|
||||
## 4. 练习
|
||||
|
||||
**(1)只打印或显示numpy数组rand_arr的小数点后3位。**
|
||||
|
||||
【知识点:输入和输出】
|
||||
- 如何在numpy数组中只打印小数点后三位?
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
np.random.seed(2002)
|
||||
rand_arr = np.random.random([5,3])
|
||||
```
|
||||
|
||||
**(2)将numpy数组a中打印的项数限制为最多6个元素。**
|
||||
|
||||
【知识点:输入和输出】
|
||||
- 如何限制numpy数组输出中打印的项目数?
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
a = np.arange(15)
|
||||
```
|
||||
|
||||
**(3)打印完整的numpy数组a而不中断。**
|
||||
|
||||
【知识点:输入和输出】
|
||||
|
||||
- 如何打印完整的numpy数组而不中断?
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
a = np.arange(15)
|
||||
np.set_printoptions(threshold=6)
|
||||
print(a)
|
||||
```
|
||||
164
IntroductionToNumpy/Task10 大作业.md
Normal file
164
IntroductionToNumpy/Task10 大作业.md
Normal file
@@ -0,0 +1,164 @@
|
||||
# 大作业
|
||||
|
||||
本次练习使用 鸢尾属植物数据集`.\iris.data`,在这个数据集中,包括了三类不同的鸢尾属植物:Iris Setosa,Iris Versicolour,Iris Virginica。每类收集了50个样本,因此这个数据集一共包含了150个样本。
|
||||
|
||||
- sepallength:萼片长度
|
||||
- sepalwidth:萼片宽度
|
||||
- petallength:花瓣长度
|
||||
- petalwidth:花瓣宽度
|
||||
|
||||
以上四个特征的单位都是厘米(cm)。
|
||||
|
||||
```python
|
||||
sepallength sepalwidth petallength petalwidth species
|
||||
0 5.1 3.5 1.4 0.2 Iris-setosa
|
||||
1 4.9 3.0 1.4 0.2 Iris-setosa
|
||||
2 4.7 3.2 1.3 0.2 Iris-setosa
|
||||
3 4.6 3.1 1.5 0.2 Iris-setosa
|
||||
4 5.0 3.6 1.4 0.2 Iris-setosa
|
||||
.. ... ... ... ... ...
|
||||
145 6.7 3.0 5.2 2.3 Iris-virginica
|
||||
146 6.3 2.5 5.0 1.9 Iris-virginica
|
||||
147 6.5 3.0 5.2 2.0 Iris-virginica
|
||||
148 6.2 3.4 5.4 2.3 Iris-virginica
|
||||
149 5.9 3.0 5.1 1.8 Iris-virginica
|
||||
|
||||
[150 rows x 5 columns]
|
||||
```
|
||||
|
||||
|
||||
|
||||
**1. 导入鸢尾属植物数据集,保持文本不变。**
|
||||
|
||||
【知识点:输入和输出】
|
||||
- 如何导入存在数字和文本的数据集?
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
**2. 求出鸢尾属植物萼片长度的平均值、中位数和标准差(第1列,sepallength)**
|
||||
|
||||
【知识点:统计相关】
|
||||
- 如何计算numpy数组的均值,中位数,标准差?
|
||||
|
||||
|
||||
|
||||
**3. 创建一种标准化形式的鸢尾属植物萼片长度,其值正好介于0和1之间,这样最小值为0,最大值为1(第1列,sepallength)。**
|
||||
|
||||
【知识点:统计相关】
|
||||
- 如何标准化数组?
|
||||
|
||||
|
||||
**4. 找到鸢尾属植物萼片长度的第5和第95百分位数(第1列,sepallength)。**
|
||||
|
||||
【知识点:统计相关】
|
||||
- 如何找到numpy数组的百分位数?
|
||||
|
||||
|
||||
|
||||
|
||||
**5. 把iris_data数据集中的20个随机位置修改为np.nan值。**
|
||||
|
||||
【知识点:随机抽样】
|
||||
- 如何在数组中的随机位置修改值?
|
||||
|
||||
|
||||
|
||||
**6. 在iris_data的sepallength中查找缺失值的个数和位置(第1列)。**
|
||||
|
||||
【知识点:逻辑函数、搜索】
|
||||
- 如何在numpy数组中找到缺失值的位置?
|
||||
|
||||
|
||||
**7. 筛选具有 sepallength(第1列)< 5.0 并且 petallength(第3列)> 1.5 的 iris_data行。**
|
||||
|
||||
【知识点:搜索】
|
||||
- 如何根据两个或多个条件筛选numpy数组?
|
||||
|
||||
|
||||
|
||||
**8. 选择没有任何 nan 值的 iris_data行。**
|
||||
|
||||
【知识点:逻辑函数、搜索】
|
||||
- 如何从numpy数组中删除包含缺失值的行?
|
||||
|
||||
|
||||
|
||||
|
||||
**9. 计算 iris_data 中sepalLength(第1列)和petalLength(第3列)之间的相关系数。**
|
||||
|
||||
【知识点:统计相关】
|
||||
- 如何计算numpy数组两列之间的相关系数?
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
**10. 找出iris_data是否有任何缺失值。**
|
||||
|
||||
【知识点:逻辑函数】
|
||||
- 如何查找给定数组是否具有空值?
|
||||
|
||||
|
||||
|
||||
|
||||
**11. 在numpy数组中将所有出现的nan替换为0。**
|
||||
|
||||
【知识点:逻辑函数】
|
||||
- 如何在numpy数组中用0替换所有缺失值?
|
||||
|
||||
|
||||
|
||||
**12. 找出鸢尾属植物物种中的唯一值和唯一值出现的数量。**
|
||||
|
||||
【知识点:数组操作】
|
||||
- 如何在numpy数组中查找唯一值的计数?
|
||||
|
||||
|
||||
|
||||
|
||||
**13. 将 iris_data 的花瓣长度(第3列)以形成分类变量的形式显示。定义:Less than 3 --> 'small';3-5 --> 'medium';'>=5 --> 'large'。**
|
||||
|
||||
【知识点:统计相关】
|
||||
- 如何将数字转换为分类(文本)数组?
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
**14. 在 iris_data 中创建一个新列,其中 volume 是 `(pi x petallength x sepallength ^ 2)/ 3`。**
|
||||
|
||||
【知识点:数组操作】
|
||||
- 如何从numpy数组的现有列创建新列?
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
**15. 随机抽鸢尾属植物的种类,使得Iris-setosa的数量是Iris-versicolor和Iris-virginica数量的两倍。**
|
||||
|
||||
【知识点:随机抽样】
|
||||
- 如何在numpy中进行概率抽样?
|
||||
|
||||
|
||||
|
||||
|
||||
**16. 根据 sepallength 列对数据集进行排序。**
|
||||
|
||||
【知识点:排序】
|
||||
- 如何按列对2D数组进行排序?
|
||||
|
||||
|
||||
**17. 在鸢尾属植物数据集中找到最常见的花瓣长度值(第3列)。**
|
||||
|
||||
【知识点:数组操作】
|
||||
- 如何在numpy数组中找出出现次数最多的值?
|
||||
|
||||
|
||||
|
||||
|
||||
**18. 在鸢尾花数据集的 petalwidth(第4列)中查找第一次出现的值大于1.0的位置。**
|
||||
|
||||
【知识点:搜索】
|
||||
- 如何找到第一次出现大于给定值的位置?
|
||||
152
IntroductionToNumpy/dataset/iris.data
Normal file
152
IntroductionToNumpy/dataset/iris.data
Normal file
@@ -0,0 +1,152 @@
|
||||
sepallength,sepalwidth,petallength,petalwidth,species
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
|
||||
@@ -50,7 +50,7 @@
|
||||
|
||||
## 基本信息
|
||||
|
||||
- 学习周期:11天,每天平均花费时间3小时-5小时不等,根据个人学习接受能力强弱有所浮动。
|
||||
- 学习周期:10天,每天平均花费时间3小时-5小时不等,根据个人学习接受能力强弱有所浮动。
|
||||
- 学习形式:理论学习 + 练习
|
||||
- 人群定位:有一定python编程的基础。
|
||||
- 先修内容:[Python编程语言](https://github.com/datawhalechina/team-learning-program/tree/master/PythonLanguage)
|
||||
@@ -71,7 +71,7 @@
|
||||
- 熟悉打开规则
|
||||
|
||||
|
||||
### Task01:输入输出(2天)
|
||||
### Task01:输入输出(1天)
|
||||
- 熟悉 Numpy 如何处理二进制文件和文本文件。
|
||||
|
||||
|
||||
|
||||
@@ -1,229 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-09-06T03:05:42.613206Z",
|
||||
"start_time": "2020-09-06T03:05:42.609217Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"**只打印或显示numpy数组rand_arr的小数点后3位。**\n",
|
||||
"\n",
|
||||
"- `rand_arr = np.random.random([5, 3])`\n",
|
||||
"\n",
|
||||
"【知识点:输入和输出】\n",
|
||||
"- 如何在numpy数组中只打印小数点后三位?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-09-06T03:06:03.840763Z",
|
||||
"start_time": "2020-09-06T03:06:03.834778Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[0.25106063 0.32735642 0.17623774]\n",
|
||||
" [0.85566173 0.11420437 0.53735783]\n",
|
||||
" [0.82253612 0.07625331 0.9358199 ]\n",
|
||||
" [0.97268875 0.91794448 0.61845404]\n",
|
||||
" [0.58615827 0.28096349 0.88048956]]\n",
|
||||
"[[0.251 0.327 0.176]\n",
|
||||
" [0.856 0.114 0.537]\n",
|
||||
" [0.823 0.076 0.936]\n",
|
||||
" [0.973 0.918 0.618]\n",
|
||||
" [0.586 0.281 0.88 ]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"rand_arr = np.random.random([5, 3])\n",
|
||||
"print(rand_arr)\n",
|
||||
"# [[0.33033427 0.05538836 0.05947305]\n",
|
||||
"# [0.36199439 0.48844555 0.26309599]\n",
|
||||
"# [0.05361816 0.71539075 0.60645637]\n",
|
||||
"# [0.95000384 0.31424729 0.41032467]\n",
|
||||
"# [0.36082793 0.50101268 0.6306832 ]]\n",
|
||||
"\n",
|
||||
"np.set_printoptions(precision=3)\n",
|
||||
"print(rand_arr)\n",
|
||||
"# [[0.33 0.055 0.059]\n",
|
||||
"# [0.362 0.488 0.263]\n",
|
||||
"# [0.054 0.715 0.606]\n",
|
||||
"# [0.95 0.314 0.41 ]\n",
|
||||
"# [0.361 0.501 0.631]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-09-06T03:06:22.855956Z",
|
||||
"start_time": "2020-09-06T03:06:22.850970Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"**将numpy数组a中打印的项数限制为最多6个元素。**\n",
|
||||
"\n",
|
||||
"【知识点:输入和输出】\n",
|
||||
"- 如何限制numpy数组输出中打印的项目数?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-09-06T03:06:51.128200Z",
|
||||
"start_time": "2020-09-06T03:06:51.123215Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14]\n",
|
||||
"[ 0 1 2 ... 12 13 14]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"a = np.arange(15)\n",
|
||||
"print(a)\n",
|
||||
"# [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14]\n",
|
||||
"np.set_printoptions(threshold=6)\n",
|
||||
"print(a)\n",
|
||||
"# [ 0 1 2 ... 12 13 14]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-09-06T03:07:14.131621Z",
|
||||
"start_time": "2020-09-06T03:07:14.126608Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"**打印完整的numpy数组a而不中断。**\n",
|
||||
"\n",
|
||||
"【知识点:输入和输出】\n",
|
||||
"- 如何打印完整的numpy数组而不中断?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-09-06T03:07:47.868079Z",
|
||||
"start_time": "2020-09-06T03:07:47.863126Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[ 0 1 2 ... 12 13 14]\n",
|
||||
"[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"a = np.arange(15)\n",
|
||||
"np.set_printoptions(threshold=6)\n",
|
||||
"print(a)\n",
|
||||
"# [ 0 1 2 ... 12 13 14]\n",
|
||||
"np.set_printoptions(threshold=np.iinfo(np.int).max)\n",
|
||||
"print(a)\n",
|
||||
"# [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python35",
|
||||
"language": "python",
|
||||
"name": "python35"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.10"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": 1,
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "Table of Contents",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": false,
|
||||
"toc_position": {},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": false
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,372 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 输入和输出\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## numpy 二进制文件\n",
|
||||
"\n",
|
||||
"`save()`、`savez()`和`load()`函数以 numpy 专用的二进制类型(npy、npz)保存和读取数据,这三个函数会自动处理ndim、dtype、shape等信息,使用它们读写数组非常方便,但是`save()`输出的文件很难与其它语言编写的程序兼容。\n",
|
||||
"\n",
|
||||
"npy格式:以二进制的方式存储文件,在二进制文件第一行以文本形式保存了数据的元信息(ndim,dtype,shape等),可以用二进制工具查看内容。\n",
|
||||
"\n",
|
||||
"npz格式:以压缩打包的方式存储文件,可以用压缩软件解压。\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"- `numpy.save(file, arr, allow_pickle=True, fix_imports=True)` Save an array to a binary file in NumPy `.npy` format.\n",
|
||||
"- `numpy.load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')` Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"【例】\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"outfile = r'.\\test.npy'\n",
|
||||
"np.random.seed(20200619)\n",
|
||||
"x = np.random.uniform(0, 1, [3, 5])\n",
|
||||
"np.save(outfile, x)\n",
|
||||
"y = np.load(outfile)\n",
|
||||
"print(y)\n",
|
||||
"# [[0.01123594 0.66790705 0.50212171 0.7230908 0.61668256]\n",
|
||||
"# [0.00668332 0.1234096 0.96092409 0.67925305 0.38596837]\n",
|
||||
"# [0.72342998 0.26258324 0.24318845 0.98795012 0.77370715]]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"- `numpy.savez(file, *args, **kwds)` Save several arrays into a single file in uncompressed `.npz` format.\n",
|
||||
"\n",
|
||||
"`savez()`第一个参数是文件名,其后的参数都是需要保存的数组,也可以使用关键字参数为数组起一个名字,非关键字参数传递的数组会自动起名为`arr_0, arr_1, …`。\n",
|
||||
"\n",
|
||||
"`savez()`输出的是一个压缩文件(扩展名为npz),其中每个文件都是一个`save()`保存的npy文件,文件名对应于数组名。`load()`自动识别npz文件,并且返回一个类似于字典的对象,可以通过数组名作为关键字获取数组的内容。\n",
|
||||
"\n",
|
||||
"【例】将多个数组保存到一个文件,可以使用`numpy.savez()`函数。\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"outfile = r'.\\test.npz'\n",
|
||||
"x = np.linspace(0, np.pi, 5)\n",
|
||||
"y = np.sin(x)\n",
|
||||
"z = np.cos(x)\n",
|
||||
"np.savez(outfile, x, y, z_d=z)\n",
|
||||
"data = np.load(outfile)\n",
|
||||
"np.set_printoptions(suppress=True)\n",
|
||||
"print(data.files) \n",
|
||||
"# ['z_d', 'arr_0', 'arr_1']\n",
|
||||
"\n",
|
||||
"print(data['arr_0'])\n",
|
||||
"# [0. 0.78539816 1.57079633 2.35619449 3.14159265]\n",
|
||||
"\n",
|
||||
"print(data['arr_1'])\n",
|
||||
"# [0. 0.70710678 1. 0.70710678 0. ]\n",
|
||||
"\n",
|
||||
"print(data['z_d'])\n",
|
||||
"# [ 1. 0.70710678 0. -0.70710678 -1. ]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"用解压软件打开 test.npz 文件,会发现其中有三个文件:`arr_0.npy,arr_1.npy,z_d.npy`,其中分别保存着数组`x,y,z`的内容。\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"## 文本文件\n",
|
||||
"`savetxt()`,`loadtxt()`和`genfromtxt()`函数用来存储和读取文本文件(如TXT,CSV等)。`genfromtxt()`比`loadtxt()`更加强大,可对缺失数据进行处理。\n",
|
||||
"\n",
|
||||
"- `numpy.savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\\n', header='', footer='', comments='# ', encoding=None)` Save an array to a text file.\n",
|
||||
" - fname:文件路径\n",
|
||||
" - X:存入文件的数组。\n",
|
||||
" - fmt:写入文件中每个元素的字符串格式,默认'%.18e'(保留18位小数的浮点数形式)。\n",
|
||||
" - delimiter:分割字符串,默认以空格分隔。\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"- `numpy.loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None)` Load data from a text file. \n",
|
||||
" - fname:文件路径。\n",
|
||||
" - dtype:数据类型,默认为float。\n",
|
||||
" - comments: 字符串或字符串组成的列表,默认为# , 表示注释字符集开始的标志。\n",
|
||||
" - skiprows:跳过多少行,一般跳过第一行表头。\n",
|
||||
" - usecols:元组(元组内数据为列的数值索引), 用来指定要读取数据的列(第一列为0)。\n",
|
||||
" - unpack:当加载多列数据时是否需要将数据列进行解耦赋值给不同的变量。\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"【例】写入和读出TXT文件。\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"outfile = r'.\\test.txt'\n",
|
||||
"x = np.arange(0, 10).reshape(2, -1)\n",
|
||||
"np.savetxt(outfile, x)\n",
|
||||
"y = np.loadtxt(outfile)\n",
|
||||
"print(y)\n",
|
||||
"# [[0. 1. 2. 3. 4.]\n",
|
||||
"# [5. 6. 7. 8. 9.]]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"test.txt文件如下:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"0.000000000000000000e+00 1.000000000000000000e+00 2.000000000000000000e+00 3.000000000000000000e+00 4.000000000000000000e+00\n",
|
||||
"5.000000000000000000e+00 6.000000000000000000e+00 7.000000000000000000e+00 8.000000000000000000e+00 9.000000000000000000e+00\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"【例】写入和读出CSV文件。\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"outfile = r'.\\test.csv'\n",
|
||||
"x = np.arange(0, 10, 0.5).reshape(4, -1)\n",
|
||||
"np.savetxt(outfile, x, fmt='%.3f', delimiter=',')\n",
|
||||
"y = np.loadtxt(outfile, delimiter=',')\n",
|
||||
"print(y)\n",
|
||||
"# [[0. 0.5 1. 1.5 2. ]\n",
|
||||
"# [2.5 3. 3.5 4. 4.5]\n",
|
||||
"# [5. 5.5 6. 6.5 7. ]\n",
|
||||
"# [7.5 8. 8.5 9. 9.5]]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"test.csv文件如下:\n",
|
||||
"```python\n",
|
||||
"0.000,0.500,1.000,1.500,2.000\n",
|
||||
"2.500,3.000,3.500,4.000,4.500\n",
|
||||
"5.000,5.500,6.000,6.500,7.000\n",
|
||||
"7.500,8.000,8.500,9.000,9.500\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"`genfromtxt()`是面向结构数组和缺失数据处理的。\n",
|
||||
"\n",
|
||||
"- `numpy.genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=''.join(sorted(NameValidator.defaultdeletechars)), replace_space='_', autostrip=False, case_sensitive=True, defaultfmt=\"f%i\", unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None, encoding='bytes')` Load data from a text file, with missing values handled as specified.\n",
|
||||
" - names:设置为True时,程序将把第一行作为列名称。\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"data.csv文件如下:\n",
|
||||
"```python\n",
|
||||
"id,value1,value2,value3\n",
|
||||
"1,123,1.4,23\n",
|
||||
"2,110,0.5,18\n",
|
||||
"3,164,2.1,19\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"【例】\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"outfile = r'.\\data.csv'\n",
|
||||
"x = np.loadtxt(outfile, delimiter=',', skiprows=1)\n",
|
||||
"print(x)\n",
|
||||
"# [[ 1. 123. 1.4 23. ]\n",
|
||||
"# [ 2. 110. 0.5 18. ]\n",
|
||||
"# [ 3. 164. 2.1 19. ]]\n",
|
||||
"\n",
|
||||
"x = np.loadtxt(outfile, delimiter=',', skiprows=1, usecols=(1, 2))\n",
|
||||
"print(x)\n",
|
||||
"# [[123. 1.4]\n",
|
||||
"# [110. 0.5]\n",
|
||||
"# [164. 2.1]]\n",
|
||||
"\n",
|
||||
"val1, val2 = np.loadtxt(outfile, delimiter=',', skiprows=1, usecols=(1, 2), unpack=True)\n",
|
||||
"print(val1) # [123. 110. 164.]\n",
|
||||
"print(val2) # [1.4 0.5 2.1]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"【例】\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"outfile = r'.\\data.csv'\n",
|
||||
"x = np.genfromtxt(outfile, delimiter=',', names=True)\n",
|
||||
"print(x)\n",
|
||||
"# [(1., 123., 1.4, 23.) (2., 110., 0.5, 18.) (3., 164., 2.1, 19.)]\n",
|
||||
"\n",
|
||||
"print(type(x)) \n",
|
||||
"# <class 'numpy.ndarray'>\n",
|
||||
"\n",
|
||||
"print(x.dtype)\n",
|
||||
"# [('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value3', '<f8')]\n",
|
||||
"\n",
|
||||
"print(x['id']) # [1. 2. 3.]\n",
|
||||
"print(x['value1']) # [123. 110. 164.]\n",
|
||||
"print(x['value2']) # [1.4 0.5 2.1]\n",
|
||||
"print(x['value3']) # [23. 18. 19.]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"data1.csv文件\n",
|
||||
"```python\n",
|
||||
"id,value1,value2,value3\n",
|
||||
"1,123,1.4,23\n",
|
||||
"2,110,,18\n",
|
||||
"3,,2.1,19\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"【例】\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"outfile = r'.\\data1.csv'\n",
|
||||
"x = np.genfromtxt(outfile, delimiter=',', names=True)\n",
|
||||
"print(x)\n",
|
||||
"# [(1., 123., 1.4, 23.) (2., 110., nan, 18.) (3., nan, 2.1, 19.)]\n",
|
||||
"\n",
|
||||
"print(type(x)) \n",
|
||||
"# <class 'numpy.ndarray'>\n",
|
||||
"\n",
|
||||
"print(x.dtype)\n",
|
||||
"# [('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value3', '<f8')]\n",
|
||||
"\n",
|
||||
"print(x['id']) # [1. 2. 3.]\n",
|
||||
"print(x['value1']) # [123. 110. nan]\n",
|
||||
"print(x['value2']) # [1.4 nan 2.1]\n",
|
||||
"print(x['value3']) # [23. 18. 19.]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"## 文本格式选项\n",
|
||||
"\n",
|
||||
"- `numpy.set_printoptions(precision=None,threshold=None, edgeitems=None,linewidth=None, suppress=None, nanstr=None, infstr=None,formatter=None, sign=None, floatmode=None, **kwarg)` Set printing options.\n",
|
||||
" - `precision`:设置浮点精度,控制输出的小数点个数,默认是8。\n",
|
||||
" - `threshold`:概略显示,超过该值则以“…”的形式来表示,默认是1000。\n",
|
||||
" - `linewidth`:用于确定每行多少字符数后插入换行符,默认为75。\n",
|
||||
" - `suppress`:当`suppress=True`,表示小数不需要以科学计数法的形式输出,默认是False。\n",
|
||||
" - `nanstr`:浮点非数字的字符串表示形式,默认`nan`。\n",
|
||||
" - `infstr`:浮点无穷大的字符串表示形式,默认`inf`。\n",
|
||||
"\n",
|
||||
"These options determine the way floating point numbers, arrays and other NumPy objects are displayed.\n",
|
||||
"\n",
|
||||
"【例】\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"np.set_printoptions(precision=4)\n",
|
||||
"x = np.array([1.123456789])\n",
|
||||
"print(x) # [1.1235]\n",
|
||||
"\n",
|
||||
"np.set_printoptions(threshold=20)\n",
|
||||
"x = np.arange(50)\n",
|
||||
"print(x) # [ 0 1 2 ... 47 48 49]\n",
|
||||
"\n",
|
||||
"np.set_printoptions(threshold=np.iinfo(np.int).max)\n",
|
||||
"print(x)\n",
|
||||
"# [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n",
|
||||
"# 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47\n",
|
||||
"# 48 49]\n",
|
||||
"\n",
|
||||
"eps = np.finfo(float).eps\n",
|
||||
"x = np.arange(4.)\n",
|
||||
"x = x ** 2 - (x + eps) ** 2\n",
|
||||
"print(x) \n",
|
||||
"# [-4.9304e-32 -4.4409e-16 0.0000e+00 0.0000e+00]\n",
|
||||
"np.set_printoptions(suppress=True)\n",
|
||||
"print(x) # [-0. -0. 0. 0.]\n",
|
||||
"\n",
|
||||
"x = np.linspace(0, 10, 10)\n",
|
||||
"print(x)\n",
|
||||
"# [ 0. 1.1111 2.2222 3.3333 4.4444 5.5556 6.6667 7.7778 8.8889\n",
|
||||
"# 10. ]\n",
|
||||
"np.set_printoptions(precision=2, suppress=True, threshold=5)\n",
|
||||
"print(x) # [ 0. 1.11 2.22 ... 7.78 8.89 10. ]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"- `numpy.get_printoptions()` Return the current print options.\n",
|
||||
"\n",
|
||||
"【例】\n",
|
||||
"```python\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"x = np.get_printoptions()\n",
|
||||
"print(x)\n",
|
||||
"# {\n",
|
||||
"# 'edgeitems': 3, \n",
|
||||
"# 'threshold': 1000, \n",
|
||||
"# 'floatmode': 'maxprec', \n",
|
||||
"# 'precision': 8, \n",
|
||||
"# 'suppress': False, \n",
|
||||
"# 'linewidth': 75, \n",
|
||||
"# 'nanstr': 'nan', \n",
|
||||
"# 'infstr': 'inf', \n",
|
||||
"# 'sign': '-', \n",
|
||||
"# 'formatter': None, \n",
|
||||
"# 'legacy': False\n",
|
||||
"# }\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python35",
|
||||
"language": "python",
|
||||
"name": "python35"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.10"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": 1,
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "Table of Contents",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": false,
|
||||
"toc_position": {},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
Reference in New Issue
Block a user