pyhdf读取方法汇总
pyhdf读取方法汇总
简介
在pyhdf中,HDF4数据分为四部分,SD(Scientific Dataset),VS(vdata),V(Vgroup)以及HDF(common declarations)。pyhdf中数据有两种类型,datasets(SD)和vdata(VS),即数据集和记录,所有数据存储在vgroups中,以类似文件夹的形式组成HDF文件。
模块介绍
1. SD 数据集模块
SD是用来的读取科学数据集的模块,包含五个类:SD、SDAttr、SDC、SDS、SDim。每个类中包含了很多方法,用于进行不同目的的计算。主要使用的为SD和SDC。
# 1. 类SD:根据文件路径进行类初始化,可用于数据集的选取读取、创建等。
# 常用方法:
create(self, name, data_type, dim_sizes) #创建数据集
datasets(self) #列出所有数据集
nametoindex(self, sds_name) #数据集名转指数
select(self, name_or_index) #选择数据集(常用)
reftoindex(self, sds_ref) #数据参考转指数(常用),暂时不明
# code
SD_file = SD(ifile)
ds_dict = SD_file.datasets() # 读取所有数据集名称
sds_obj = SD_file.select(\'Radar_Reflectivity\') # 选择数据集
SD_file.end()
# 2. 类SDC:常数集合,包含了数据类型和打开方式。
# code
sd_out = SD(outfile, SDC.CREATE | SDC.WRITE)
sds1 = SD.create(sd_out, \'data\', SDC.FLOAT32, (1200,1600))
sds1.set(np.zeros((1200, 1600), dtype=np.float32))
sd_out.end()
# 3. 类SDS:包含数据集对象,及所选取的具体数据集合。
# 常用方法:
get(self, start=None, count=None, stride=None) #以数组形式读取数据
info(self) #获得数据名等信息
# code
sds_obj = SD_file.select(\'Radar_Reflectivity\')
arr = sds_obj.get()
info = sds_obj.info()
# 4. 类SDAtrr:包含了与数据集属性有关的方法。
# code
attrs = sds_obj.attributes()
2. VS 记录模块
VS是用来处理记录(Vdata)的模块,VD子类中封装了单个记录表的操作函数,VDAttr子类中的函数主要实现数据表属性的读写等操作,VDField子类中的函数主要实现数据表中字段的定义等功能,VS子类中的函数主要实现数据表的创建、打开、操作、结束等功能。
在对卫星数据处理时用的频次不如SD高,但也有部分卫星的lon/lat用vdata组成,如cloudsat。其包含四个类:VD、VDAttr、VDField以及VS,常用的为VS和VD。
# 类VS:通过HDF文件路径来初始化处理接口,主要目的是在通过V模块逐级打开HDF文件时对其中包含的Vdata进行处理
# 方法有:
attach(self, num_name, write=0) # 获取Vdata
create(self, name, fields) # 创建Vdata
find(self, vName) # 根据名称查找Vdata
vdatainfo(self, listAttr=0) # 给出所有Vdatas的信息
# 类VD:处理Vdata,首先要通过VS的vstart()获取Vdata
# 方法有:
inquire(self) #或取Vdata相关信息
read(self, nRec=1) #读取给定数目的记录
# 类VDField:对数据记录进行操作。
# 类VDAttr:与属性有关的操作。
vdataFile = HDF(ifile, HC.READ).vstart()
VD_object = vdataFile.attach(\'Longitude\') # 创建VD实例
vInfo = VD_object.inquire() # 读取vdata基本信息
lon = VD_object.read(nRec=vInfo[0]) # 读取vdata, nRec为行数
vdata 读取模式
# read vdata: method 1
from pyhdf.HDF import *
from pyhdf.VS import *
f = HDF(\'inventory.hdf\') # open \'inventory.hdf\' in read mode
vs = f.vstart() # init vdata interface
vd = vs.attach(\'INVENTORY\') # attach \'INVENTORY\' in read mode
# Display some vdata attributes
print "status:", vd.status
print "vdata: ", vd._name # predefined attribute: vdata name
print "nrecs: ", vd._nrecs # predefined attribute: num records
# Display value of attribute \'unit\' for all fields
print "units: ",
for fieldName in vd._fields: # loop over all field names
try:
# instantiate field and obtain value of attribute \'unit\'
v = vd.field(fieldName).unit
print "%s: %s" % (fieldName, v),
except: # no \'unit\' attribute: ignore
pass
print ""
# Loop over the vdata records, displaying each record as a table row.
# Current record position is 0 after attaching the vdata.
while 1:
try:
rec = vd.read() # read next record
# rec = vd[vd.tell()]
except HDF4Error: # end of vdata reached
break
vd.detach() # "close" the vdata
vs.end() # terminate the vdata interface
f.close() # close the HDF file
# read vdata: method 2 -- easy
from pyhdf.HDF import *
from pyhdf.VS import *
f = HDF(\'inventory.hdf\') # open \'inventory.hdf\' in read mode
vs = f.vstart() # init vdata interface
vd = vs.attach(\'INVENTORY\') # attach \'INVENTORY\' in read mode
....
# Read all records at once, and loop over the sequence.
arr = [rec for rec in vd[:]]
vd.detach() # "close" the vdata
...
vdata 写入模式
# method 1
from pyhdf.HDF import *
from pyhdf.VS import *
f = HDF(\'test.hdf\', HC.WRITE)
# Open file \'test.hdf\' in write mode
vs = f.vstart() # init vdata interface
vd = vs.attach(\'vtest\', 1) # attach vdata \'vtest\' in write mode
attr = vd.attr(\'version\')
attr.set(HC.CHAR8,\'1.0\') # set attribute \'version\' to \'1.0\'
print(attr.get()) # get and print attribute value
fld = vd.field(\'fld1\')
attr = fld.attr(\'range\')
attr.set(HC.INT32,(-10, 15))
print(attr.get())
# method 2
from pyhdf.HDF import *
from pyhdf.VS import *
f = HDF(\'test.hdf\', HC.WRITE)
vs = f.vstart() # init vdata interface
vd = vs.attach(\'vtest\', 1) # attach vdata \'vtest\' in write mode
vd.version = \'1.0\' # create vdata attribute \'version\',
print(vd.version)
fld = vd.field(\'fld1\')
fld.range = (-10, 15) # create field attribute \'range\'
print(fld.range) # print attribute value
vd.detach() # "close" the vdata
vs.end() # terminate the vdata interface
f.close() # close the HDF file
3. 组模块V
组模块是用来逐级打开HDF文件,并获取组相关信息如tag和ref等的模块。是常用的一个模块,包括V、VG以及VGAttr三个类。
# 1.类V:通过路径初始化接口并创建、查找或获取组,常用方法有:
attach(self, num_name, write=0) #根据名称获取组
create(self, name) #根据名称创建组
find(self, name) #根据名称查找组
# 2.类VG:处理vgroups,首先需要通过类V的vgstart()初始化接口,常用方法有:
tagrefs(self) #获取所有标签和引用,用于进一步打开数据。
# 3.类VGAttr:与属性有关的操作。
# 流程
from pyhdf.HDF import *
from pyhdf.V import *
hdfFile = HDF(name, HC.xxx) # open HDF file
v = hdfFile.vgstart() # initialize V interface on HDF file
# ... # manipulate vgroups
v.end() # terminate V interface
hdfFile.close() # close HDF file
from pyhdf.HDF import *
from pyhdf.V import *
# Open file \'test.hdf\' in write mode
f = HDF(\'test.hdf\', HC.WRITE)
v = f.vgstart() # init vgroup interface
vg = v.attach(\'vtest\', 1) # attach vgroup \'vtest\' in write mode
attr = vg.attr(\'version\') # define the \'version\' attribute
attr.set(HC.CHAR8,\'1.0\') # set attribute \'version\' to \'1.0\'
print(attr.get()) # get and print attribute value
attr = vg .attr(\'range\') # prepare to define attribute \'range\'
attr.set(HC.INT32,(-10, 15))# set attribute \'range\'
print(attr.get()) # get and print attribute value
vg.detach() # "close" the vgroup
v.end() # terminate the vgroup interface
f.close() # close the HDF file
# read a V-group
# 用VGroup打开文件,索引到所有数据(SD/VD)
from pyhdf.HDF import *
from pyhdf.V import *
from pyhdf.VS import *
from pyhdf.SD import *
import sys
def describevg(refnum):
# Describe the vgroup with the given refnum.
# Open vgroup in read mode.
vg = v.attach(refnum)
print "----------------"
print "name:", vg._name, "class:",vg._class, "tag,ref:",
print vg._tag, vg._refnum
# Show the number of members of each main object type.
print "members: ", vg._nmembers,
print "datasets:", vg.nrefs(HC.DFTAG_NDG),
print "vdatas: ", vg.nrefs(HC.DFTAG_VH),
print "vgroups: ", vg.nrefs(HC.DFTAG_VG)
# Read the contents of the vgroup.
members = vg.tagrefs()
# Display info about each member.
index = -1
for tag, ref in members: # 用VGroup获取到所有members
index += 1
print "member index", index
# Vdata tag
if tag == HC.DFTAG_VH:
vd = vs.attach(ref) # VS打开单个数据表vdata
nrecs, intmode, fields, size, name = vd.inquire()
print " vdata:",name, "tag,ref:",tag, ref
print " fields:",fields
print " nrecs:",nrecs
vd.detach()
# SDS tag
elif tag == HC.DFTAG_NDG:
sds = sd.select(sd.reftoindex(ref))
name, rank, dims, type, nattrs = sds.info()
print " dataset:",name, "tag,ref:", tag, ref
print " dims:",dims
print " type:",type
sds.endaccess()
# VS tag
elif tag == HC.DFTAG_VG:
vg0 = v.attach(ref)
print " vgroup:", vg0._name, "tag,ref:", tag, ref
vg0.detach()
# Unhandled tag
else:
print "unhandled tag,ref",tag,ref
# Close vgroup
vg.detach()
if __name__ == \'__main__\':
# Open HDF file in readonly mode.
filename = sys.argv[1]
hdf = HDF(filename)
# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v = hdf.vgstart()
# Scan all vgroups in the file.
ref = -1
while 1:
try:
ref = v.getid(ref)
except HDF4Error,msg: # no more vgroup
break
describevg(ref)
# Terminate V, VS and SD interfaces.
v.end()
vs.end()
sd.end()
# Close HDF file.
hdf.close()
4. 公共声明模块HDF
公共声明模块包括了许多常量的定义,并辅助V模块和VS模块的调用。包括两个类,HC和HDF。
# 类HC:定义常量。常用的为:
FTAG_NDG == 720:通过tag判别是否为数据集dataset
DFTAG_VH == 1962:通过tag判别是否为vdata
DFTAG_VG == 1965:通过tag判别是否为vgroup
# 类HDF:打开HDF文件为后续处理做准备。常用方法有:
vgstart(self) #初始化V类
vstart(self) #初始化VS类
后记
- SD(操作scientific data)和VS(操作vdata)用的比较多,组模块V-group可用于索引遍历整个文件的层次结构,从而打开所有SD/VD数据。
- cloudsat的数据比较特殊,Lon/lat存储在vs里,要读取vdata记录表。