0%

cv/fasterRCNN

fasterRCNN网络

roi forward

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/* in class RoI.forward() of roi_module.py
# x: tensor(n, 512, hh, ww)
# rois: tensor(n*2000, 5) [index, x_min, y_min, x_max, y_max]
# output: tensor(n*2000, 512, outh=7, outw=7).cuda()
# argmax_data: tensor(n*2000, 512, outh=7, outw=7).int().cuda()
# spatial_scale: 1./FasterRCNN.feat_stride = 1./16
# C, H, W: x.size(1:) = 512, hh, ww
# outh, outw = VGG16RoIHead.roi_size = 7
# output.numel(): (n*2000) * 512 * 7 * 7

args = [x.data_ptr(), rois.data_ptr(),
output.data_ptr(),
self.argmax_data.data_ptr(),
self.spatial_scale, C, H, W,
self.outh, self.outw,
output.numel()]

# from collections import namedtuple
# Stream = namedtuple('Stream', ['ptr'])
# def GET_BLOCKS(N, K=CUDA_NUM_THREADS):
# return (N + K - 1) // K # <=> ceil(N / K)
# CUDA_NUM_THREADS = 1024
stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

# block: (1024, 1, 1)
# grid: ( ceil((n*2000) * 512 * 7 * 7 / 1024), 1, 1 )
self.forward_fn(args=args,
block=(CUDA_NUM_THREADS, 1, 1),
grid=(GET_BLOCKS(output.numel()), 1, 1),
stream=stream)
*/