pyhdf读取方法汇总

简介

在pyhdf中,HDF4数据分为四部分,SD(Scientific Dataset),VS(vdata),V(Vgroup)以及HDF(common declarations)。pyhdf中数据有两种类型,datasets(SD)和vdata(VS),即数据集和记录,所有数据存储在vgroups中,以类似文件夹的形式组成HDF文件。

模块介绍

1. SD 数据集模块

​ SD是用来的读取科学数据集的模块,包含五个类:SD、SDAttr、SDC、SDS、SDim。每个类中包含了很多方法,用于进行不同目的的计算。主要使用的为SD和SDC。

# 1. 类SD:根据文件路径进行类初始化,可用于数据集的选取读取、创建等。
# 常用方法:
create(self, name, data_type, dim_sizes) #创建数据集
datasets(self) #列出所有数据集
nametoindex(self, sds_name) #数据集名转指数
select(self, name_or_index) #选择数据集(常用)
reftoindex(self, sds_ref) #数据参考转指数(常用),暂时不明

# code
SD_file = SD(ifile)
ds_dict = SD_file.datasets() # 读取所有数据集名称
sds_obj = SD_file.select(\'Radar_Reflectivity\') # 选择数据集
SD_file.end()

# 2. 类SDC:常数集合,包含了数据类型和打开方式。
# code
sd_out = SD(outfile, SDC.CREATE | SDC.WRITE)
sds1 = SD.create(sd_out, \'data\', SDC.FLOAT32, (1200,1600))
sds1.set(np.zeros((1200, 1600), dtype=np.float32))
sd_out.end()

# 3. 类SDS:包含数据集对象,及所选取的具体数据集合。
# 常用方法:
get(self, start=None, count=None, stride=None) #以数组形式读取数据
info(self) #获得数据名等信息

# code
sds_obj = SD_file.select(\'Radar_Reflectivity\')
arr = sds_obj.get()
info = sds_obj.info()

# 4. 类SDAtrr:包含了与数据集属性有关的方法。
# code
attrs = sds_obj.attributes()
2. VS 记录模块

VS是用来处理记录(Vdata)的模块,VD子类中封装了单个记录表的操作函数,VDAttr子类中的函数主要实现数据表属性的读写等操作,VDField子类中的函数主要实现数据表中字段的定义等功能,VS子类中的函数主要实现数据表的创建、打开、操作、结束等功能。

在对卫星数据处理时用的频次不如SD高,但也有部分卫星的lon/lat用vdata组成,如cloudsat。其包含四个类:VD、VDAttr、VDField以及VS,常用的为VS和VD。

# 类VS:通过HDF文件路径来初始化处理接口,主要目的是在通过V模块逐级打开HDF文件时对其中包含的Vdata进行处理
# 方法有:
attach(self, num_name, write=0) # 获取Vdata
create(self, name, fields) # 创建Vdata
find(self, vName) # 根据名称查找Vdata
vdatainfo(self, listAttr=0) # 给出所有Vdatas的信息

# 类VD:处理Vdata,首先要通过VS的vstart()获取Vdata
# 方法有:
inquire(self) #或取Vdata相关信息
read(self, nRec=1) #读取给定数目的记录

# 类VDField:对数据记录进行操作。

# 类VDAttr:与属性有关的操作。

vdataFile = HDF(ifile, HC.READ).vstart()
VD_object = vdataFile.attach(\'Longitude\') # 创建VD实例
vInfo  = VD_object.inquire() # 读取vdata基本信息
lon = VD_object.read(nRec=vInfo[0]) # 读取vdata, nRec为行数

vdata 读取模式

# read vdata: method 1
from pyhdf.HDF import *
from pyhdf.VS import *

f = HDF(\'inventory.hdf\')    # open \'inventory.hdf\' in read mode
vs = f.vstart()             # init vdata interface
vd = vs.attach(\'INVENTORY\')   # attach \'INVENTORY\' in read mode

# Display some vdata attributes
print "status:", vd.status
print "vdata: ", vd._name  # predefined attribute: vdata name
print "nrecs: ", vd._nrecs # predefined attribute:  num records

# Display value of attribute \'unit\' for all fields 
print "units: ",
for fieldName in vd._fields: # loop over all field names
    try:
        # instantiate field and obtain value of attribute \'unit\'
        v = vd.field(fieldName).unit
        print "%s: %s" % (fieldName, v),
    except:        # no \'unit\' attribute: ignore
        pass
print ""


# Loop over the vdata records, displaying each record as a table row.
# Current record position is 0 after attaching the vdata.
while 1:
    try:
        rec = vd.read()       # read next record
      	# rec = vd[vd.tell()]
        
    except HDF4Error:             # end of vdata reached
        break

vd.detach()               # "close" the vdata
vs.end()                  # terminate the vdata interface
f.close()                 # close the HDF file
# read vdata: method 2 -- easy
from pyhdf.HDF import *
from pyhdf.VS import *

f = HDF(\'inventory.hdf\')     # open \'inventory.hdf\' in read mode
vs = f.vstart()              # init vdata interface
vd = vs.attach(\'INVENTORY\')  # attach \'INVENTORY\' in read mode

....

# Read all records at once, and loop over the sequence.
arr = [rec for rec in vd[:]]

vd.detach()               # "close" the vdata
...

vdata 写入模式

# method 1
from pyhdf.HDF import *
from pyhdf.VS import *
f = HDF(\'test.hdf\', HC.WRITE)
# Open file \'test.hdf\' in write mode

vs = f.vstart()            # init vdata interface
vd = vs.attach(\'vtest\', 1) # attach vdata \'vtest\' in write mode

attr = vd.attr(\'version\')  
attr.set(HC.CHAR8,\'1.0\') # set attribute \'version\' to \'1.0\'
print(attr.get())        # get and print attribute value

fld  = vd.field(\'fld1\')
attr = fld.attr(\'range\')
attr.set(HC.INT32,(-10, 15)) 
print(attr.get())             
# method 2
from pyhdf.HDF import *
from pyhdf.VS import *
f = HDF(\'test.hdf\', HC.WRITE) 
vs = f.vstart()            # init vdata interface
vd = vs.attach(\'vtest\', 1) # attach vdata \'vtest\' in write mode
vd.version = \'1.0\'         # create vdata attribute \'version\',
print(vd.version)   

fld  = vd.field(\'fld1\')    
fld.range = (-10, 15)      # create field attribute \'range\'
print(fld.range)            # print attribute value

vd.detach()                # "close" the vdata
vs.end()                   # terminate the vdata interface
f.close()                  # close the HDF file

3. 组模块V

​ 组模块是用来逐级打开HDF文件,并获取组相关信息如tag和ref等的模块。是常用的一个模块,包括V、VG以及VGAttr三个类。

# 1.类V:通过路径初始化接口并创建、查找或获取组,常用方法有:
attach(self, num_name, write=0) #根据名称获取组
create(self, name) #根据名称创建组
find(self, name) #根据名称查找组

# 2.类VG:处理vgroups,首先需要通过类V的vgstart()初始化接口,常用方法有:
tagrefs(self) #获取所有标签和引用,用于进一步打开数据。

# 3.类VGAttr:与属性有关的操作。
# 流程
from pyhdf.HDF import *
from pyhdf.V import *

hdfFile = HDF(name, HC.xxx) # open HDF file
v = hdfFile.vgstart()       # initialize V interface on HDF file
# ...                       # manipulate vgroups
v.end()                     # terminate V interface
hdfFile.close()             # close HDF file
from pyhdf.HDF import *
from pyhdf.V import *
# Open file \'test.hdf\' in write mode
f = HDF(\'test.hdf\', HC.WRITE) 

v = f.vgstart()             # init vgroup interface
vg = v.attach(\'vtest\', 1)   # attach vgroup \'vtest\' in write mode
attr = vg.attr(\'version\')   # define the \'version\' attribute
attr.set(HC.CHAR8,\'1.0\')    # set attribute \'version\' to \'1.0\'
print(attr.get())           # get and print attribute value

attr = vg .attr(\'range\')    # prepare to define attribute \'range\'
attr.set(HC.INT32,(-10, 15))# set attribute \'range\' 
print(attr.get())           # get and print attribute value

vg.detach()                # "close" the vgroup
v.end()                    # terminate the vgroup interface
f.close()                  # close the HDF file
# read a V-group
# 用VGroup打开文件,索引到所有数据(SD/VD)

from pyhdf.HDF import *
from pyhdf.V   import *
from pyhdf.VS  import *
from pyhdf.SD  import *

import sys

def describevg(refnum):
    # Describe the vgroup with the given refnum.
    # Open vgroup in read mode.
    vg = v.attach(refnum)
    print "----------------"
    print "name:", vg._name, "class:",vg._class, "tag,ref:",
    print vg._tag, vg._refnum

    # Show the number of members of each main object type.
    print "members: ", vg._nmembers,
    print "datasets:", vg.nrefs(HC.DFTAG_NDG),
    print "vdatas:  ", vg.nrefs(HC.DFTAG_VH),
    print "vgroups: ", vg.nrefs(HC.DFTAG_VG)

    # Read the contents of the vgroup.
    members = vg.tagrefs()

    # Display info about each member.
    index = -1
    for tag, ref in members: # 用VGroup获取到所有members
        index += 1
        print "member index", index
        # Vdata tag
        if tag == HC.DFTAG_VH:
            vd = vs.attach(ref)  # VS打开单个数据表vdata
            nrecs, intmode, fields, size, name = vd.inquire()
            print "  vdata:",name, "tag,ref:",tag, ref
            print "    fields:",fields
            print "    nrecs:",nrecs
            vd.detach()

        # SDS tag
        elif tag == HC.DFTAG_NDG:
            sds = sd.select(sd.reftoindex(ref))
            name, rank, dims, type, nattrs = sds.info()
            print "  dataset:",name, "tag,ref:", tag, ref
            print "    dims:",dims
            print "    type:",type
            sds.endaccess()

        # VS tag
        elif tag == HC.DFTAG_VG:
            vg0 = v.attach(ref)
            print "  vgroup:", vg0._name, "tag,ref:", tag, ref
            vg0.detach()

        # Unhandled tag
        else:
            print "unhandled tag,ref",tag,ref

    # Close vgroup
    vg.detach()

if __name__ == \'__main__\':
    # Open HDF file in readonly mode.
    filename = sys.argv[1]
    hdf = HDF(filename)
    # Initialize the SD, V and VS interfaces on the file.
    sd = SD(filename)
    vs = hdf.vstart()
    v  = hdf.vgstart()

    # Scan all vgroups in the file.
    ref = -1
    while 1:
        try:
            ref = v.getid(ref)
        except HDF4Error,msg:    # no more vgroup
            break
        describevg(ref)

    # Terminate V, VS and SD interfaces.
    v.end()
    vs.end()
    sd.end()

    # Close HDF file.
    hdf.close()

4. 公共声明模块HDF

公共声明模块包括了许多常量的定义,并辅助V模块和VS模块的调用。包括两个类,HC和HDF。

# 类HC:定义常量。常用的为:
FTAG_NDG  == 720:通过tag判别是否为数据集dataset
DFTAG_VH  == 1962:通过tag判别是否为vdata
DFTAG_VG  == 1965:通过tag判别是否为vgroup

# 类HDF:打开HDF文件为后续处理做准备。常用方法有:
vgstart(self) #初始化V类
vstart(self) #初始化VS类

后记

  1. SD(操作scientific data)和VS(操作vdata)用的比较多,组模块V-group可用于索引遍历整个文件的层次结构,从而打开所有SD/VD数据。
  2. cloudsat的数据比较特殊,Lon/lat存储在vs里,要读取vdata记录表。

版权声明:本文为ljwgis原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/ljwgis/p/14686495.html