-
Notifications
You must be signed in to change notification settings - Fork 0
/
videoDataset.py
120 lines (111 loc) · 5.2 KB
/
videoDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import torch
import cv2
import os
from torch.utils.data import Dataset
import numpy as np
import random
import csv
from crop_keypoint import keypoint
from crop import cropFrames
from keyframe import keyframes
class VideoDataset(Dataset):
def __init__(self, videodir, jsondir, channels,frame_num, mode, transform=None):
# frame: how many frame we want in small list. For example, if i want 3 numbers in each small list. [[1,2,3,],[4,5,6]]
# random_frame_number:how many random frame we want in small list. For example, if i want 2 numbers in each small list. [[1,2,3,],[4,5,6]]->[[1,2],[5,6]]
total_number = 0
for paths, dirs, files in os.walk(videodir):
total_number += len(files)
self.total_number = total_number
self.videodir = videodir
self.train = '/home/han006/experiment_v3/CSL5000_100class/dataset/CSL5000_100classes/train.txt'
self.test = '/home/han006/experiment_v3/CSL5000_100class/dataset/CSL5000_100classes/test.txt'
self.jsondir = jsondir
self.channels = channels
self.transform = transform
self.frame_num = frame_num
self.mode = mode
def get_path(self, train_or_test_txt):
all_path = []
with open(train_or_test_txt, 'r') as f:
output = f.readlines()
for data in output:
data_info = data.split('/')
label = int(data_info[0])
video_name = data.split('\n')[0] + '.avi'
json_name = data.split('\n')[0] + '.json'
final_path = self.videodir+'/'+video_name
final_json_path = self.jsondir+'/'+ json_name
all_path.append((final_path, final_json_path, label))
return all_path
def __len__(self):
if self.mode == 'train':
return len(self.get_path(self.train))
else:
return len(self.get_path(self.test))
def readVideo(self, videofile,jsonfile):
global final_keyframe
keypoints_class = keypoint(jsonfile)
#print('class',keypoints_class.get_x_y().shape)
cropFrame_class = cropFrames()
cap = cv2.VideoCapture(videofile)
min_x = cropFrame_class.get_min_x(keypoints_class.get_x_y())
max_x = cropFrame_class.get_max_x(keypoints_class.get_x_y())
min_y = cropFrame_class.get_min_y(keypoints_class.get_x_y())
max_y = cropFrame_class.get_max_y(keypoints_class.get_x_y())
grayframes=[]
RGBframes=[]
frames = []
while (cap.isOpened()):
ret, frame = cap.read()
#print('frame',frame)
if ret == True:
crop_frame = frame[int(min_y - 5):int(max_y + 5), int(min_x - 5):int(max_x + 5)]
frames.append(crop_frame)
else:
break
if self.channels == 3:
if self.mode == 'train':
LUVframes=keyframes(frames, self.frame_num)
for LUVframe in LUVframes:
crop_frame = cv2.cvtColor(LUVframe, cv2.COLOR_BGR2RGB)
RGBframes.append(crop_frame)
final_keyframe=np.array(LUVframes)
elif self.mode=='val':
LUVframes=keyframes(frames, self.frame_num)
for LUVframe in LUVframes:
crop_frame = cv2.cvtColor(LUVframe, cv2.COLOR_BGR2RGB)
RGBframes.append(crop_frame)
final_keyframe=np.array(LUVframes)
elif self.channels==1:
if self.mode=='train':
LUVframes=keyframes(frames, self.frame_num)
for LUVframe in LUVframes:
# crop_frame =cv2.cvtColor(LUVframe,cv2.COLOR_BGR2GRAY)
crop_frame = np.expand_dims(LUVframe, axis=2)
grayframes.append(crop_frame)
final_keyframe=np.array(grayframes)
elif self.mode=='val':
LUVframes=keyframes(frames, self.frame_num)
for LUVframe in LUVframes:
# crop_frame =cv2.cvtColor(LUVframe,cv2.COLOR_BGR2GRAY)
crop_frame = np.expand_dims(LUVframe, axis=2)
grayframes.append(crop_frame)
final_keyframe=np.array(grayframes)
return final_keyframe
def __getitem__(self, index):
if self.mode == 'train':
data = self.get_path(self.train)
else:
data = self.get_path(self.test)
videopath, jsonpath, videolabel = data[index]
video_data = self.readVideo(videopath, jsonpath)
video_frame_data_list = []
for video_frame_data in video_data:
video_frame_data = self.transform(video_frame_data)
video_frame_data_list.append(video_frame_data)
video = torch.stack(video_frame_data_list,
dim=0)
video=video.permute(1,0,2,3)
# 用其实现 final_frame.append(in_tensor) 的功能:先构造已经append好的final_frame(此时final_frame为list),然后final_frame = torch.stack(final_frame, dim = 0)
videolabel = torch.tensor(videolabel)
return video, videolabel