如图,bbt_frames下有多个视频文件夹,每个文件夹包括多个h5文件,将每个文件夹的h5文件合并成一个h5文件,并以该文件夹命名,存储至video_feature文件夹。
import os
import h5py
import numpy as np
def merge_h5_files(input_dir, output_dir):
# 获取输入目录下的所有文件夹
folders = [folder for folder in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, folder))]
for folder in folders:
folder_path = os.path.join(input_dir, folder)
h5_files = [file for file in os.listdir(folder_path) if file.endswith('.h5')]
if len(h5_files) == 0:
continue
output_file = os.path.join(output_dir, folder + '.h5')
with h5py.File(output_file, 'w') as outfile:
feature_list = []
for h5_file in h5_files:
h5_file_path = os.path.join(folder_path, h5_file)
with h5py.File(h5_file_path, 'r') as infile:
feature = infile['video_features'][0]
feature_list.append(feature)
feat_data = np.array(feature_list)
outfile.create_dataset('video_features', data=feat_data, compression="gzip")
print(f'Merged H5 files in folder {folder}')
# 示例用法
video_directory = 'bbt_frames'
feature_directory = 'video_feature'
video_path = os.path.abspath(video_directory)
feature_path = os.path.abspath(feature_directory)
merge_h5_files(video_path, feature_path)