@DETECTORS.register_module() class RadarVelocityBboxOnnxFuse3(nn.Module): def init(self, backbone=None, neck=None, head=None, bbox_type='sigmoid', use_scale=False, roi_type='align', img_sz=[192, 224], poly=2, feat_dim=256, use_bn=True, use_weight=False, classify=False, alpha_dist=0.1, alpha_regu=0.1, alpha_acc=10, alpha_last=5, alpha_weight=0.01, alpha_bbox=1, **kwargs): super(RadarVelocityBboxOnnxFuse3, self).init() self.classify = classify self.alpha_acc = alpha_acc self.alpha_bbox = alpha_bbox self.alpha_dist = alpha_dist self.alpha_regu = alpha_regu self.alpha_last = alpha_last self.alpha_weight = alpha_weight self.use_weight = use_weight self.use_bn = use_bn self.feat_dim = feat_dim self.img_sz = img_sz self.bbox_type = bbox_type self.bbox_type = bbox_type self.use_scale = use_scale self.roi_type = roi_type self.poly = poly self.init_layers() self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
torch.nn.init.constant_(m.bias.data, 0.0)
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
torch.nn.init.constant_(m.bias.data, 0.0)
def init_layers(self,):
ht_size = max(self.img_sz[0] // 32, self.img_sz[1] // 32)
grid_sz = 2 * (ht_size // 2) + 1
if self.use_bn:
self.conv2a = conv_bn(7, 32, kernel_size=3, stride=2)
self.conv2aa = conv_bn(32, 32, kernel_size=3, stride=1)
self.conv2b = conv_bn(32, 32, kernel_size=3, stride=1)
self.conv3a = conv_bn(32, 64, kernel_size=3, stride=2)
self.conv3aa = conv_bn(64, 64, kernel_size=3, stride=1)
self.conv3b = conv_bn(64, 64, kernel_size=3, stride=1)
self.conv4a = conv_bn(64, 96, kernel_size=3, stride=2)
self.conv4aa = conv_bn(96, 96, kernel_size=3, stride=1)
self.conv4b = conv_bn(96, 96, kernel_size=3, stride=1)
self.conv5a = conv_bn(96, 128, kernel_size=3, stride=2)
self.conv5aa = conv_bn(128, 128, kernel_size=3, stride=1)
self.conv5b = conv_bn(128, 128, kernel_size=3, stride=1)
self.conv6aa = conv_bn(128, self.feat_dim, kernel_size=3, stride=2)
self.conv6a = conv_bn(self.feat_dim, self.feat_dim, kernel_size=3, stride=1)
self.conv6b = conv_bn(self.feat_dim, self.feat_dim, kernel_size=3, stride=1)
else:
self.conv2a = conv(7, 32, kernel_size=3, stride=2)
self.conv2aa = conv(32, 32, kernel_size=3, stride=1)
self.conv2b = conv(32, 32, kernel_size=3, stride=1)
self.conv3a = conv(32, 64, kernel_size=3, stride=2)
self.conv3aa = conv(64, 64, kernel_size=3, stride=1)
self.conv3b = conv(64, 64, kernel_size=3, stride=1)
self.conv4a = conv(64, 96, kernel_size=3, stride=2)
self.conv4aa = conv(96, 96, kernel_size=3, stride=1)
self.conv4b = conv(96, 96, kernel_size=3, stride=1)
self.conv5a = conv(96, 128, kernel_size=3, stride=2)
self.conv5aa = conv(128, 128, kernel_size=3, stride=1)
self.conv5b = conv(128, 128, kernel_size=3, stride=1)
self.conv6aa = conv(128, self.feat_dim, kernel_size=3, stride=2)
self.conv6a = conv(self.feat_dim, self.feat_dim, kernel_size=3, stride=1)
self.conv6b = conv(self.feat_dim, self.feat_dim, kernel_size=3, stride=1)
self.relu = nn.ReLU(inplace=False)
self.roi_conv1 = nn.Conv2d(self.feat_dim, 256, grid_sz, stride=1, padding=0)
self.roi_conv2 = nn.Conv2d(256, 256, 1, stride=1, padding=0)
self.wt_fc = nn.Linear(256, 1)
self.mv_fc1 = nn.Linear(256, 128)
self.mv_fc4 = nn.Linear(128, 64)
self.mv_fc5 = nn.Linear(64, 3)
self.dis_X_encoding = nn.Linear(256, 64)
self.dis_Y_encoding = nn.Linear(256, 64)
self.dis_Z_encoding = nn.Linear(256, 64)
self.bbox_fc = nn.Sequential(nn.Conv2d(self.feat_dim, 64, 3, stride=1, padding=0),
nn.ReLU(inplace=False),
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(64, 7, 1, stride=1, padding=0),
# nn.Sigmoid()
)
if self.use_weight:
self.wt_fc = nn.Sequential(nn.AdaptiveAvgPool2d(1),
nn.Conv2d(self.feat_dim, 1, 1, stride=1, padding=0),
nn.Sigmoid()
)
# *******************************************************************
self.dis_fc = nn.Sequential(nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.Linear(64, 2))
self.vel_fc = nn.Sequential(nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.Linear(64, 2))
self.car_roi = RoIAlign((grid_sz, grid_sz), aligned=False)
if self.classify:
self.class_fc = nn.Sequential(nn.Linear(256, 9))
self.roi_scale = torch.tensor([[0, 0, ht_size, ht_size, ht_size, ht_size, 0]]).float()
self.grid_sz = grid_sz
# ******************************************************
self.conf_loss = build_loss(dict(type='UncertainL2Loss'))
self.iou_loss = build_loss(dict(type='GIoULoss'))
self.bbox_loss = build_loss(dict(type='L1Loss'))
self.cls_loss = build_loss(dict(type='CrossEntropyLoss'))
def loss(self, output, kwargs,):
bs = len(kwargs['img'])
nframe = kwargs['all_DR'].size(1)
# world wise
position_pred = output['dis'][..., [0, 1]]
smooth_pred = output['s_dis'][..., [0, 1]]
velocity_pred = output['vel']
velocity_true = kwargs['all_DR_vel'][..., [0, 2]]
position_true = kwargs['all_DR_pos'][..., [0, 2]]
velocity_pred_r = output['rotate_vel'][..., [-1]]
#velocity_true_r = kwargs['all_r_vel'][..., [2]]
velocity_true_r = kwargs['all_sp_vel'].unsqueeze(dim=-1)
position_pred_r = output['rotate_pos'][..., [-1]]
position_true_r = kwargs['all_r_pos'][..., [-1]]
acc_pred = output['rotate_acc'][..., -1]
if self.poly == 3:
acc_true = kwargs['all_sp_acc'] * kwargs['t']
elif self.poly == 4:
acc_true = kwargs['all_sp_acc']
# dist
loss_dist = self.alpha_dist * torch.norm(position_true - position_pred, p=2, dim=-1).mean()
loss_smoo = self.alpha_dist * torch.norm(position_true - smooth_pred, p=2, dim=-1).mean()
loss_disr_r = self.alpha_dist * F.mse_loss(position_true_r, position_pred_r)
# velocity
loss_velc = torch.norm(velocity_true - velocity_pred, p=2, dim=-1).mean()
loss_rotate = F.mse_loss(velocity_true_r, velocity_pred_r)
loss_last = self.alpha_last * F.mse_loss(velocity_true_r[:, -1], velocity_pred_r[:, -1])
loss_mv = F.mse_loss(velocity_true_r.mean(dim=1), output['m_vel'][..., [-1]])
# bbox
pred_bbox = output['rois'].reshape(-1, 4) #.reshape(bs, -1, 4)[:, -1]
gt_bbox = kwargs['rois'].reshape(-1, 4) #[:, -1]
inds = gt_bbox.abs().sum(-1) > 0
pred_bbox = pred_bbox[inds]
gt_bbox = gt_bbox[inds]
loss_bbox = self.alpha_bbox * self.bbox_loss(pred_bbox, gt_bbox)
loss_giou = self.alpha_bbox * self.iou_loss(pred_bbox, gt_bbox)
# regular
l1_reg = self.alpha_regu * torch.norm(output['acc_p'], p=1, dim=-1).mean()
l1_acc = self.alpha_acc * F.mse_loss(acc_pred, acc_true)
# dynamic
vel_std = (velocity_true_r - velocity_pred_r).var(dim=1).sum(-1).mean()
pos_std = self.alpha_dist * (position_true_r - position_pred_r).var(dim=1).sum(-1).mean()
extra = output['extra']
if extra is not None:
loss_extra = (torch.exp(torch.clamp_min(extra[..., :2] - extra[..., 2:], 0)) - 1).sum()
else:
loss_extra = torch.zeros_like(loss_giou)
loss = {'loss_dist': loss_dist, 'loss_smooth': loss_smoo, 'loss_disr_r': loss_disr_r,
'loss_velc': loss_velc, 'loss_rotate': loss_rotate, 'loss_last': loss_last,
'loss_bbox': loss_bbox, 'loss_giou': loss_giou, 'loss_extra': loss_extra,
'loss_reg': l1_reg, 'loss_vel_var': vel_std, 'loss_pos_var': pos_std, 'loss_acc': l1_acc,
'loss_mv': loss_mv,
}
if self.classify:
pred = output['cls']
true = kwargs['all_type'].view(-1)
loss_cls = self.cls_loss(pred, true)
loss['loss_cls'] = loss_cls
# weight
if self.use_weight:
pred_weight = output['weight']
gt_weight = torch.ones_like(output['weight'])
loss_weight = self.alpha_weight * torch.norm(pred_weight - gt_weight, p=2).mean()
loss['loss_weight'] = loss_weight
return loss
def get_predictions(self, res, kwargs):
# out = {'pred_z': res['s_dis'][0, -1, [0]],
# 'pred_t': res['vel'][0, -1]}
out = {
'pred_r': res['rotate_pos'][0, -1, [1]],
'pred_z': res['s_dis'][0, 1, [1]],
'pred_t': res['rotate_vel'][0, -1]
}
if 'location' in kwargs:
# out['gt_t'] = kwargs['velocity'][0, -1, [0, 2]]
# out['gt_z'] = kwargs['location'][0, -1, [2]]
out['gt_t'] = kwargs['all_r_vel'][0, -1, [0, 2]]
out['gt_z'] = kwargs['location'][0, -1, [2]]
out['gt_r'] = kwargs['all_r_pos'][0, -1, [1]]
else:
out['gt_t'] = None
out['gt_z'] = None
out['gt_r'] = None
if 0:
img = kwargs['img'][0]
bbox = res['rois']
for n in range(img.size(0)):
image = transforms.ToPILImage()(img[n]).convert('RGB')
draw = ImageDraw.Draw(image)
draw.rectangle([bbox[n, 0], bbox[n, 1], bbox[n, 2], bbox[n, 3]], outline='red')
image.save("/data1/mono_velocity-master/crop/%03d_b.png" % (self.save_idx))
self.save_idx += 1
return out
def get_transf_mat(self, t, w, y):
if self.poly == 2:
n = w.sum(-1)
t_sum = (w * t).sum(-1)
t2_sum = (w * t * t).sum(-1)
mat = torch.stack((n, t_sum, t_sum, t2_sum), dim=-1).reshape(t.size(0), 2, 2)
#t_mat = torch.inverse(mat)
t_mat = torch.pinverse(mat)
y_sum = (w.unsqueeze(dim=-1) * y).sum(dim=1)
ty_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1)).sum(dim=1)
obser = torch.stack((y_sum, ty_sum), dim=1)
elif self.poly == 3:
n = w.sum(-1)
t_sum = (w * t).sum(-1)
t2_sum = (w * t * t).sum(-1)
t3_sum = (w * t * t * t).sum(-1)
t4_sum = (w * t * t * t * t).sum(-1)
mat = torch.stack((n, t_sum, t2_sum, t_sum, t2_sum, t3_sum, t2_sum, t3_sum, t4_sum), dim=-1).reshape(t.size(0), 3, 3)
#t_mat = torch.inverse(mat)
try:
t_mat = torch.pinverse(mat)
except:
print('error cooured:', mat)
print(t)
t_mat = torch.eye(3).float().to(mat.device).unsqueeze(dim=0).repeat(t.size(0), 3, 3)
y_sum = (w.unsqueeze(dim=-1) * y).sum(dim=1)
ty_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1)).sum(dim=1)
t2y_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1) * t.unsqueeze(dim=-1)).sum(dim=1)
obser = torch.stack((y_sum, ty_sum, t2y_sum), dim=1)
elif self.poly == 4:
n = w.sum(-1)
t_sum = (w * t).sum(-1)
t2_sum = (w * t * t).sum(-1)
t3_sum = (w * t * t * t).sum(-1)
t4_sum = (w * t * t * t * t).sum(-1)
t5_sum = (w * t * t * t * t * t).sum(-1)
t6_sum = (w * t * t * t * t * t * t).sum(-1)
mat = torch.stack((n, t_sum, t2_sum, t3_sum,
t_sum, t2_sum, t3_sum, t4_sum,
t2_sum, t3_sum, t4_sum, t5_sum,
t3_sum, t4_sum, t5_sum, t6_sum), dim=-1).reshape(t.size(0), 4, 4)
#t_mat = torch.inverse(mat)
t_mat = torch.pinverse(mat)
y_sum = (w.unsqueeze(dim=-1) * y).sum(dim=1)
ty_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1)).sum(dim=1)
t2y_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1) * t.unsqueeze(dim=-1)).sum(dim=1)
t3y_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1) * t.unsqueeze(dim=-1) * t.unsqueeze(dim=-1)).sum(dim=1)
obser = torch.stack((y_sum, ty_sum, t2y_sum, t3y_sum), dim=1)
else:
t_mat = None
raise 'not inplement'
params = torch.matmul(t_mat, obser)
return params
def post_process(self, x_embed, y_embed, z_embed, w, kwargs):
# to bev
cam2bev = kwargs['cam2bev'].unsqueeze(dim=1)
coord = torch.stack((x_embed, y_embed, z_embed), dim=-2)
xyz_feat = torch.matmul(cam2bev, coord)
x_feat = xyz_feat[:, :, 0]
z_feat = xyz_feat[:, :, 1]
y_feat = xyz_feat[:, :, 2]
# to worldwise
p = torch.ones_like(x_feat)
embed = torch.stack((x_feat, y_feat, p), dim=-2)
res = torch.matmul(kwargs['all_dr_mat'], embed)
dr_x = res[:, :, 0]
dr_y = res[:, :, 1]
embed = torch.cat((dr_x, dr_y), dim=-1)
z = embed #self.relu(self.mv_fc1(embed))
# transform
avg = z.mean(dim=1).unsqueeze(dim=1)
y = z - avg
params = self.get_transf_mat(kwargs['t'], w, y)
# relative distance/velocity
t = kwargs['t'].unsqueeze(dim=-1)
dis = self.mv_fc5(self.relu(self.mv_fc4(z)))
if self.poly == 2:
s_z = avg + params[:, [0]] + params[:, [1]] * t
s_v = params[:, [1]]
s_a = torch.zeros_like(s_v)
acc = None
elif self.poly == 3:
# params[:, [0]]: dis feature, params[:, [1]]: velocity feature, params[:, [2]]: acceleration feature
s_z = avg + params[:, [0]] + params[:, [1]] * t + params[:, [2]] * t * t
s_v = params[:, [1]] + 2 * params[:, [2]] * t
m_v = params[:, [1]]
s_a = 2 * params[:, [2]]
acc = self.vel_fc(s_a * t)
elif self.poly == 4:
s_z = avg + params[:, [0]] + params[:, [1]] * t + params[:, [2]] * t * t + params[:, [3]] * t * t * t
s_v = params[:, [1]] + 2 * params[:, [2]] * t + 3 * params[:, [3]] * t * t
s_a = 2 * params[:, [2]] + 6 * params[:, [3]] * t
acc = self.vel_fc(s_a)
else:
raise 'not inplement'
s_dis = self.mv_fc5(self.relu(self.mv_fc4(s_z)))
vel = self.vel_fc(s_v)
m_vel = self.vel_fc(m_v).squeeze(dim=1)
theta = kwargs['all_DR'][:, :, -1]
r_mat = torch.stack((torch.cos(theta), torch.sin(theta), -torch.sin(theta), torch.cos(theta)), dim=-1).reshape(t.size(0), t.size(1), 2, 2)
r_vel = torch.matmul(r_mat, vel.unsqueeze(dim=-1)).squeeze(dim=-1)
r_pos = torch.matmul(r_mat, dis[..., :2].unsqueeze(dim=-1)).squeeze(dim=-1)
r_acc = torch.matmul(r_mat, acc.unsqueeze(dim=-1)).squeeze(dim=-1)
m_theta = theta.mean(dim=1)
m_mat = torch.stack((torch.cos(m_theta), torch.sin(m_theta), -torch.sin(m_theta), torch.cos(m_theta)), dim=-1).reshape(t.size(0), 2, 2)
m_rv = torch.matmul(m_mat, m_vel.unsqueeze(dim=-1)).squeeze(dim=-1)
res = {'dis': dis, 'vel': vel, 's_dis': s_dis, 'rotate_vel': r_vel, 'rotate_pos': r_pos, 'acc_p': s_a, 'rotate_acc': r_acc, 'm_vel': m_rv}
return res
def forward(self, return_loss=False, **kwargs):
b, k, c, h, w = kwargs['img'].shape
n = kwargs['all_r_vel'].size(1)
im = kwargs['img']
coord = kwargs['coord']
im1 = torch.cat((im, coord), dim=2).view(-1, im.size(2)+coord.size(2), h, w)
c12 = self.conv2b(self.conv2aa(self.conv2a(im1)))
c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
feat = c16.reshape(b, -1, self.feat_dim, c16.size(2), c16.size(3))[:, :n].reshape(-1, self.feat_dim, c16.size(2), c16.size(3))
rois = (self.roi_scale.to(im.device) * self.bbox_fc(c16).squeeze().sigmoid())[..., [2, 3, 4, 5]].reshape(-1, 1, 4)
# z = torchvision.ops.roi_align(feat, list(rois.reshape(b, -1, 1, 4)[:, :n].reshape(-1, 1, 4)), (self.grid_sz, self.grid_sz))
roi_ids = torch.arange(n * b, dtype=torch.int32, device=rois.device).unsqueeze(-1)
z = self.car_roi(feat, torch.cat((roi_ids, rois.reshape(b, -1, 4)[:, :n].reshape(-1, 4)), dim=1))
z = self.roi_conv1(z)
z = self.relu(z)
z = self.roi_conv2(z)
z = self.relu(z)
z = z.view(b, n, 256)
if self.classify:
cls = self.class_fc(z).view(b*n, -1)
else:
cls = None
cus_w = kwargs['all_weight']
if self.use_weight:
w = self.wt_fc(c16).reshape(b, k)[..., :n] * cus_w
else:
w = cus_w
x_embed = self.dis_X_encoding(z)
y_embed = self.dis_Y_encoding(z)
z_embed = self.dis_Z_encoding(z)
# *******************************************************************
aux = self.post_process(x_embed, y_embed, z_embed, w, kwargs)
if self.training:
res = {'rois': rois * 32, 'extra': rois, 'weight': w, 'cls': cls}
res.update(aux)
loss = self.loss(res, kwargs)
return loss
else:
res = {'rois': rois * 32}
res.update(aux)
out = self.get_predictions(res, kwargs)
return [out]
def train_step(self, data, optimizer, **kwargs):
losses = self(**data)
loss, log_vars = self._parse_losses(losses)
outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(data['velocity']))
return outputs
def val_step(self, data, optimizer=None, **kwargs):
losses = self(**data)
loss, log_vars = self._parse_losses(losses)
outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(data['img_metas']))
return outputs
def _parse_losses(self, losses, **kwargs):
log_vars = OrderedDict()
for loss_name, loss_value in losses.items():
if isinstance(loss_value, torch.Tensor):
log_vars[loss_name] = loss_value.mean()
elif isinstance(loss_value, list):
log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
else:
raise TypeError(f'{loss_name} is not a tensor or list of tensors')
loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
# If the loss_vars has different length, GPUs will wait infinitely
if dist.is_available() and dist.is_initialized():
log_var_length = torch.tensor(len(log_vars), device=loss.device)
dist.all_reduce(log_var_length)
message = (f'rank {dist.get_rank()}' +
f' len(log_vars): {len(log_vars)}' + ' keys: ' +
','.join(log_vars.keys()))
assert log_var_length == len(log_vars) * dist.get_world_size(), \
'loss log variables are different across GPUs!\n' + message
log_vars['loss'] = loss
for loss_name, loss_value in log_vars.items():
# reduce loss when distributed training
if dist.is_available() and dist.is_initialized():
loss_value = loss_value.data.clone()
dist.all_reduce(loss_value.div_(dist.get_world_size()))
log_vars[loss_name] = loss_value.item()
return loss, log_vars
@DATASETS.register_module() class SmokeCropLoaderRadarFuse3(Dataset): CLASSES = ['car']
def __init__(self,
pair_file,
img_root,
pkl_root,
custom_mtx=None,
nframe=5,
img_rsz=[192, 224],
skip=5,
random_choose=True,
random_erase=False,
expand_ratio=2,
ref_coord=False,
crop_noise=True,
center_base=False,
auxy_root=None,
drop_dr_ratio=0,
pe_dim=0,
bbox_num=1,
feat_out=0,
aux_coord=True,
dist_range=None,
main_type=['Car', 'Truck', 'Bus'],
scale_range=[[-0.7, 0.7], [-0.4, 0.4], [0, 256], [0, 256]],
filter_ignore=False,
**kwards):
if 'test_mode' in kwards:
self.test_mode = kwards['test_mode']
else:
self.test_mode = False
self.filter_ignore = filter_ignore
self.dist_range = dist_range
self.main_type = main_type
self.aux_coord = aux_coord
self.feat_out = feat_out
self.drop_dr_ratio = drop_dr_ratio
self.pkl_root = pkl_root
self.bbox_num = bbox_num
self.pe_dim = pe_dim
self.ref_coord = ref_coord
self.auxy_root = auxy_root
self.img_root = img_root
self.crop_noise = crop_noise
self.skip = skip
self.expand_ratio = expand_ratio
self.random_choose = random_choose
self.random_erase = random_erase
self.nframe = nframe
self.center_base = center_base
self.img_rsz = img_rsz
self.scale_range = scale_range
self.custom_mtx = np.array(custom_mtx)
self.load_infos(pair_file, pkl_root)
self.transform = transforms.Compose([transforms.Resize((img_rsz[0], img_rsz[1])), transforms.ToTensor()])
self.augment = AugmentImagePair([0.8, 1.2, 0.5, 2.0, 0.8, 1.2], balance_scale=True)
self.flag = np.zeros(len(self.infos), dtype=np.uint8)
def load_bbox_infos(self, root):
if root is not None:
infos = []
files = os.listdir(root)
for file in files:
img_path = os.path.join(root, file)
items = file.split('[')[1].split(']')[0].split('-')
try:
bbox = [float(v) for v in items]
infos.append({'img_path': img_path, 'bbox': bbox})
except:
continue
self.bbox_infos = infos
random.shuffle(self.bbox_infos)
else:
self.bbox_infos = None
def load_ignore(self, file):
fp = open(file, 'r')
lines = fp.readlines()
fp.close()
ignore = {}
for l in lines:
items = l.split(', ')
batch = items[0]
video = items[1]
gid = items[2]
key = '{}+{}+{}'.format(batch, video, gid)
ignore[key] = 0
return ignore
def load_infos(self, pair_file, pkl_root):
if self.filter_ignore:
self.ignore = self.load_ignore('/data1/mono_velocity-master/debug/err_infos/ignore')
else:
self.ignore = {}
pair_file_list = pair_file if isinstance(pair_file, list) else [pair_file]
max_gap = self.nframe * 100 + 1000
if (self.test_mode == False) | self.feat_out | 1:
self.infos = []
if self.test_mode:
self.data = {}
for f in pair_file_list:
try:
data = mmcv.load(f)
except:
fp = open(f, 'rb')
data = pickle.load(fp)
fp.close()
for batch in data:
for video in data[batch]:
if (video == None):
continue
for gid in data[batch][video]['data']:
key = '{}+{}+{}'.format(batch, video, gid)
if key in self.ignore:
continue
if ('type' in data[batch][video]) and (data[batch][video]['type'][gid] not in self.main_type):
continue
else:
if self.test_mode == False:
max_type = 6 if ('batch5' in batch) else type_cvtor[data[batch][video]['type'][gid]]
else:
max_type=0
stamps = sorted([int(d) for d in list(data[batch][video]['data'][gid].keys())])
if self.dist_range is not None:
try:
stamps = [d for d in stamps if (data[batch][video]['data'][gid][str(d)]['location'][2] > self.dist_range[0]) & (data[batch][video]['data'][gid][str(d)]['location'][2] < self.dist_range[1])]
except:
print(batch, video, gid, self.test_mode)
continue
if 0:
for i in range(self.nframe - 1, len(stamps), self.skip):
sweeps = [d for d in stamps if (stamps[i] - d < max_gap) & (stamps[i] - d >= 0)]
if len(sweeps) < self.nframe:
continue
self.infos.append({'batch': batch,
'video': video,
'gid': gid,
'type': max_type,
'img_folder': data[batch][video]['img_folder'] if 'img_folder' in data[batch][video] else None,
'sweeps': [str(s) for s in sweeps]})
else:
i = self.nframe - 1
while i < len(stamps):
sweeps = [d for d in stamps if (stamps[i] - d < max_gap) & (stamps[i] - d >= 0)]
if len(sweeps) < self.nframe:
i += self.skip
continue
self.infos.append({'batch': batch,
'video': video,
'gid': gid,
'type': max_type,
'img_folder': data[batch][video]['img_folder'] if 'img_folder' in data[batch][video] else None,
'sweeps': [str(s) for s in sweeps]})
if ('radar' in batch) & (False == self.test_mode):
all_state = [data[batch][video]['data'][gid][str(s)]['state'] for s in stamps[i:i + self.skip]]
else:
all_state = None
if (all_state != None) and (('拐点' in all_state) | ('减速' in all_state) | ('起步' in all_state) | ('缓行' in all_state) | ('加速' in all_state) | ('静止' in all_state)):
i += 1
elif 'batch5' in batch:
i += 1
else:
i += self.skip
if self.test_mode:
self.data.update(data)
else:
# NOTE: 修改路径
self.infos = mmcv.load("/data1/velocity_model/G3090/annotaions/merge_crop_info_valid_test-acc-20frame.pkl")
self.data = {}
for f in pair_file_list:
try:
data = mmcv.load(f)
except:
fp = open(f, 'rb')
data = pickle.load(fp)
fp.close()
self.data.update(data)
print('num samples:', len(self.infos))
# bbox
self.bbox_idx = 0
self.load_bbox_infos(self.auxy_root)
# calib data
self.all_calib = None #mmcv.load('/dataset/zhangjingwei/label_pkl3/aeb_2M_1080/train_l3d_120d_2023_02/120_train__pkl_1117/calib.pkl')
# pkl
self.pkl_calib = mmcv.load(os.path.join(pkl_root, 'all_calib.pkl'))
def __len__(self):
return len(self.infos)
def load_img_decry(self, image_path):
img_decode = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), 1)
if img_decode is None:
if not hasattr(self, 'handle_dec'):
kms_port = 443
kms_ip = "encryptplatxn.hikvision.com"
ca_path = "/data1/opensourcelib/G3090/deal_with_dataset/decrypt/misc/encryptplatxn.hikvision.com.pem"
id_path = "/data1/opensourcelib/G3090/deal_with_dataset/decrypt/misc/id.txt"
Dec_user_name = "zhengxiuzhi"
Dec_user_password = "Hik12345"
dec_flag = 0
# 0.init enc lib
# AuthEncInit()
self.handle_dec = ctypes.c_void_p()
ret = AuthEncryptCreateHandle(ctypes.pointer(self.handle_dec), kms_ip, kms_port, ca_path, Dec_user_name, Dec_user_password, dec_flag, id_path)
fsize = os.path.getsize(image_path)
pbuff = ctypes.create_string_buffer(fsize)
dtat_len = ctypes.c_longlong(fsize) # 需要将开辟的结果缓存区长度输入接口作校验,如果长度不够会报错,成功则返回实际加解密后的数据长度给dst_len;
ret = AuthDecryptFileToBuf(image_path, pbuff, ctypes.pointer(dtat_len), self.handle_dec)
# AuthEncDeInit()
img_arr = np.frombuffer(pbuff, np.uint8)
img_decode = cv2.imdecode(img_arr, cv2.IMREAD_COLOR + cv2.IMREAD_IGNORE_ORIENTATION)
if img_decode is None:
img_decode = cv2.imread(image_path)
if img_decode.shape[0] > 1080:
img_decode = img_decode[100:1180, :]
return img_decode
def load_img_decry2(self, image_path):
img_decode = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), 1)
if img_decode is None:
if not hasattr(self, 'handle_dec2'):
kms_port = 443
kms_ip = "encryptplatxn.hikvision.com"
# NOTE: 替换为本地解密文件路径
ca_path = "/data1/velocity_model/G3090/mmdetection3d/mmdet3d/datasets/decrypt/misc/encryptplatxn.hikvision.com.pem"
id_path = "/data1/velocity_model/G3090/mmdetection3d/mmdet3d/datasets/decrypt/misc/id.txt"
Dec_user_name = "wuziyang"
Dec_user_password = "Wzy123456"
dec_flag = 0
# 0.init enc lib
# AuthEncInit()
self.handle_dec2 = ctypes.c_void_p()
ret = AuthEncryptCreateHandle(ctypes.pointer(self.handle_dec2), kms_ip, kms_port, ca_path, Dec_user_name, Dec_user_password, dec_flag, id_path)
fsize = os.path.getsize(image_path)
pbuff = ctypes.create_string_buffer(fsize)
dtat_len = ctypes.c_longlong(fsize) # 需要将开辟的结果缓存区长度输入接口作校验,如果长度不够会报错,成功则返回实际加解密后的数据长度给dst_len;
ret = AuthDecryptFileToBuf(image_path, pbuff, ctypes.pointer(dtat_len), self.handle_dec2)
# AuthEncDeInit()
img_arr = np.frombuffer(pbuff, np.uint8)
img_decode = cv2.imdecode(img_arr, cv2.IMREAD_COLOR + cv2.IMREAD_IGNORE_ORIENTATION)
if img_decode is None:
img_decode = cv2.imread(image_path)
if img_decode.shape[0] > 1080:
img_decode = img_decode[100:1180, :]
return img_decode
def distort_point(self, uv, mtx, distort_K):
x0 = (uv[..., 0] - mtx[0, 2]) / mtx[0, 0]
y0 = (uv[..., 1] - mtx[1, 2]) / mtx[1, 1]
r2 = x0 * x0 + y0 * y0
r4 = r2 * r2
r6 = r4 * r2
k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 = distort_K[0], distort_K[1], distort_K[2], distort_K[3], distort_K[4], distort_K[5], distort_K[6], distort_K[7]
x = (x0 * (1 + k_1 * r2 + k_2 * r4 + k_3 * r6) / (1 + k_4 * r2 + k_5 * r4 + k_6 * r6) + 2 * p_1 * x0 * y0 + p_2 * (r2 + 2 * x0 * x0))
y = (y0 * (1 + k_1 * r2 + k_2 * r4 + k_3 * r6) / (1 + k_4 * r2 + k_5 * r4 + k_6 * r6) + 2 * p_2 * x0 * y0 + p_1 * (r2 + 2 * y0 * y0))
u = x * mtx[0, 0] + mtx[0, 2]
v = y * mtx[1, 1] + mtx[1, 2]
return np.stack((u, v), axis=-1)
def load_image(self, calib, info, bbox, stamp):
try:
mtx = calib['P2'][:, :3]
distort = calib['distCoeffs'][:8]
# load image
img_path = '{}/{}.jpg'.format(info['img_folder'], stamp)
image = self.load_img_decry(img_path)
img_w = image.shape[1]
img_h = image.shape[0]
source = image.reshape(-1, 3)
# resize
w = bbox[2] - bbox[0]
h = bbox[3] - bbox[1]
scale =self.img_rsz[0] / h
dw = int(scale * w)
dh = int(scale * h)
dst = np.zeros((self.img_rsz[0], self.img_rsz[1], 3), dtype=np.uint8).reshape(-1, 3)
if self.center_base:
u = np.arange(self.img_rsz[1])
v = np.arange(self.img_rsz[0])
uu, vv = np.meshgrid(u, v)
suu = (uu + 0.5 - 0.5 * self.img_rsz[1]) / scale + 0.5 * (bbox[0] + bbox[2])
svv = (vv + 0.5 - 0.5 * self.img_rsz[0]) / scale + 0.5 * (bbox[1] + bbox[3])
else:
u = np.arange(dw)
v = np.arange(dh)
uu, vv = np.meshgrid(u, v)
suu = (uu + 0.5) / scale + bbox[0]
svv = (vv + 0.5) / scale + bbox[1]
spp = np.ones_like(suu)
# virture camera to real camera
uvz = np.matmul(
np.expand_dims(np.expand_dims(np.matmul(mtx, np.linalg.inv(self.custom_mtx)), axis=0), axis=0),
np.expand_dims(np.stack((suu, svv, spp), axis=-1), axis=-1)).squeeze(axis=-1)
# distort
dist_uv = self.distort_point(uvz, mtx, distort)
# interpolate
x1y1 = dist_uv.astype(np.int64).reshape(-1, 2)
mask = (x1y1[..., 0] >= 0) & (x1y1[..., 0] < img_w) & (x1y1[..., 1] >= 0) & (x1y1[..., 1] < img_h)
src_ind = x1y1[:, 0] + x1y1[:, 1] * img_w
dst_ind = (uu + vv * self.img_rsz[1]).reshape(-1)
dst[dst_ind[mask]] = source[src_ind[mask]]
dst = dst.reshape(self.img_rsz[0], self.img_rsz[1], 3)
img = torch.from_numpy(cv2.cvtColor(dst, cv2.COLOR_BGR2RGB).astype(np.float32).transpose(2, 0, 1) / 255)
except:
print('load {}/{}.jpg failed'.format(info['img_folder'], stamp))
img = None
scale = 1
return img, scale
def proj_bbox(self, calib, anno):
# 3D框
cube = anno['position'] + anno['dimension'] + [anno['theta']]
cube = np.expand_dims(np.array([float(d) for d in cube]), axis=0)
if self.crop_noise:
var = np.array([0.05, 0.05, 0.1, 0.05, 0.05, 0.05])
prob = 2 * (np.random.rand(6) - 0.5)
cube[0, :6] *= (1 + prob * var)
cube[0, 6] += 2 * (random.random() - 0.5) * math.pi * 10 / 180
rot = cube[..., 6]
R = np.expand_dims(np.eye(3), axis=0)
R[:, 0, 0] = math.cos(rot)
R[:, 0, 2] = math.sin(rot)
R[:, 2, 0] = -math.sin(rot)
R[:, 2, 2] = math.cos(rot)
R = np.expand_dims(R, axis=1)
corners = np.array([[0.5, -1, 0.5],
[0.5, -1, -0.5],
[-0.5, -1, -0.5],
[-0.5, -1, 0.5],
[0.5, 0, 0.5],
[0.5, 0, -0.5],
[-0.5, 0, -0.5],
[-0.5, 0, 0.5]]).reshape(1, 8, 3)
corners = (corners * np.expand_dims(cube[:, 3:6], axis=1)).reshape(1, 8, 3, 1)
rot_corners = np.matmul(R, corners)
rot_corners += cube[:, :3].reshape(-1, 1, 3, 1)
rot_corners = np.concatenate((rot_corners, np.ones((1, 8, 1, 1))), axis=2)
# 转相机
velo2cam = np.eye(4)
velo2cam[:3] = calib['Tr_velo_to_cam']
velo2cam = velo2cam.reshape(1, 1, 4, 4)
xyzc = np.matmul(velo2cam, rot_corners)[:, :, :3, :]
# 转图像
if self.custom_mtx is not None:
mtx = self.custom_mtx
else:
mtx = calib['P2']
cam2img = mtx[:, :3].reshape(1, 1, 3, 3)
uvs = np.matmul(cam2img, xyzc).squeeze(axis=-1)
uvs[..., 0] /= uvs[..., 2]
uvs[..., 1] /= uvs[..., 2]
xmin = max(0, min(uvs[..., 0].min(axis=1)[0], 1920))
xmax = max(0, min(uvs[..., 0].max(axis=1)[0], 1920))
ymin = max(0, min(uvs[..., 1].min(axis=1)[0], 1080))
ymax = max(0, min(uvs[..., 1].max(axis=1)[0], 1080))
bbox = np.array([xmin, ymin, xmax, ymax])
return bbox
def decode_annos(self, calib, anno):
# 中心点
xyz = np.array([float(d) for d in anno['position']] + [1])
xyz[1] -= 0.5 * float(anno['dimension'][1])
xyz = xyz.reshape(4, 1)
# 转相机
velo2cam = np.eye(4)
velo2cam[:3] = calib['Tr_velo_to_cam']
velo2cam = velo2cam.reshape(4, 4)
xyzc = np.matmul(velo2cam, xyz).squeeze(axis=-1)[:3]
# 速度旋转
velo2cam[:3, 3] *= 0
vel = np.array([float(d) for d in anno['velocity']] + [1])
vel = vel.reshape(4, 1)
velc = (np.matmul(velo2cam, vel).squeeze(axis=-1))[:3]
return xyzc, velc
def get_cam_inner(self, calib):
if self.custom_mtx is None:
cam_fx = calib['P2'][0, 0]
cam_fy = calib['P2'][1, 1]
cam_cx = calib['P2'][0, 2]
cam_cy = calib['P2'][1, 2]
else:
cam_fx = self.custom_mtx[0, 0]
cam_fy = self.custom_mtx[1, 1]
cam_cx = self.custom_mtx[0, 2]
cam_cy = self.custom_mtx[1, 2]
return cam_fx, cam_fy, cam_cx, cam_cy
def get_stamps(self, info):
sweeps = info['sweeps']
if self.random_choose & (False == self.test_mode):
random.shuffle(sweeps)
stamps = list(sorted(sweeps[:self.nframe]))
else:
stamps = sweeps[-self.nframe:]
return stamps
def encode_coord(self, calib, scale, flow_bbox):
if flow_bbox is not None:
cam_fx, cam_fy, cam_cx, cam_cy = self.get_cam_inner(calib)
if self.pe_dim == 0:
u = np.arange(self.img_rsz[1])
v = np.arange(self.img_rsz[0])
if self.center_base:
x = ((u + 0.5 - 0.5 * self.img_rsz[1]) / scale + 0.5 * (flow_bbox[0, 0] + flow_bbox[0, 2]) - cam_cx) / cam_fx
if self.ref_coord:
y = (v + 0.5 - 0.5 * self.img_rsz[0]) / scale
else:
y = ((v + 0.5 - 0.5 * self.img_rsz[0]) / scale + 0.5 * (flow_bbox[0, 1] + flow_bbox[0, 3]) - cam_cy) / cam_fy
else:
x = (u / scale + flow_bbox[0, 0] - cam_cx) / cam_fx
if self.ref_coord:
y = v / scale
else:
y = (v / scale + flow_bbox[0, 1] - cam_cy) / cam_fy
x = np.expand_dims(x, axis=0)
y = np.expand_dims(y, axis=0)
x = np.repeat(np.expand_dims(x, 1), self.img_rsz[0], axis=1)
y = np.repeat(np.expand_dims(y, 2), self.img_rsz[1], axis=2)
# NOTE: 补充两个通道,与之前保持一致@wuziyang
if self.aux_coord:
uu, vv = np.meshgrid(u, v)
x_prime = uu * cam_fx / (flow_bbox[0, 2] - flow_bbox[0, 0] + 1e-8) / self.img_rsz[1]
y_prime = vv * cam_fy / (flow_bbox[0, 3] - flow_bbox[0, 1] + 1e-8) / self.img_rsz[0]
coord = torch.from_numpy(np.concatenate([x, y, np.expand_dims(x_prime, axis=0), np.expand_dims(y_prime, axis=0)])).float()
else:
coord = torch.from_numpy(np.concatenate((x, y), axis=0)).float()
else:
temperature = 10000
dim_t = torch.arange(self.pe_dim, dtype=torch.float32)
dim_t = temperature ** (2 * (dim_t // 2) / self.pe_dim)
w = self.img_rsz[1] // 32
h = self.img_rsz[0] // 32
u = torch.arange(w)
v = torch.arange(h)
# uu = (flow_bbox[0, 2] - flow_bbox[0, 0]) * (u + 0.5) / w + flow_bbox[0, 0]
# vv = (flow_bbox[0, 3] - flow_bbox[0, 1]) * (v + 0.5) / h + flow_bbox[0, 1]
map_scale = scale / 32
if self.center_base:
uu = ((u + 0.5 - 0.5 * w) / map_scale + 0.5 * (flow_bbox[0, 0] + flow_bbox[0, 2]) - cam_cx) / cam_fx
if self.ref_coord:
vv = (v + 0.5 - 0.5 * h) / map_scale
else:
vv = ((v + 0.5 - 0.5 * h) / map_scale + 0.5 * (flow_bbox[0, 1] + flow_bbox[0, 3]) - cam_cy) / cam_fy
else:
uu = (u / map_scale + flow_bbox[0, 0] - cam_cx) / cam_fx
if self.ref_coord:
vv = v / map_scale
else:
vv = (v / map_scale + flow_bbox[0, 1] - cam_cy) / cam_fy
pos_y, pos_x = torch.meshgrid(vv, uu)
pos_x = pos_x[:, :, None] / dim_t
pos_y = pos_y[:, :, None] / dim_t
pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=-1).flatten(-2)
pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=-1).flatten(-2)
coord = torch.cat((pos_y, pos_x), dim=-1).permute(2, 0, 1)
else:
if self.pe_dim > 0:
coord = torch.rand(self.pe_dim * 2, self.img_rsz[0] // 32, self.img_rsz[1] // 32).float()
else:
# NOTE: 补充两个通道,与之前保持一致@wuziyang
if self.aux_coord:
coord = torch.rand(4, self.img_rsz[0], self.img_rsz[1]).float()
else:
coord = torch.rand(2, self.img_rsz[0], self.img_rsz[1]).float()
return coord
def get_data_info(self, idx):
info = copy.deepcopy(self.infos[idx])
calib = self.all_calib[self.pkl_calib[info['batch']][info['video']]['calib']]['2M']
stamps = self.get_stamps(info)
all_location = np.zeros((self.nframe, 3), dtype=np.float32)
all_velocity = np.zeros((self.nframe, 3), dtype=np.float32)
all_img = np.zeros((self.nframe, 3, self.img_rsz[0], self.img_rsz[1]), dtype=np.float32)
all_weight = np.ones((self.nframe), dtype=np.float32)
all_stamps = np.zeros((self.nframe), dtype=np.float32)
all_DR = np.zeros((self.nframe, 3), dtype=np.float32)
all_DR_vel = np.zeros((self.nframe, 3), dtype=np.float32)
all_r_vel = np.zeros((self.nframe, 3), dtype=np.float32)
all_DR_pos = np.zeros((self.nframe, 3), dtype=np.float32)
all_2D_bbox = np.zeros((self.nframe, 4), dtype=np.float32)
all_coord = []
for i, stamp in enumerate(stamps):
anno = mmcv.load('{}/{}/{}/{}/{}.pkl'.format(self.pkl_root, info['batch'], info['video'], info['gid'], stamp))
bbox = self.proj_bbox(calib, anno)
location, velocity = self.decode_annos(calib, anno)
flow_bbox, n_bbox = generate_flow_rois(np.expand_dims(bbox, axis=0), self.expand_ratio, 1920, 1080, pad=5)
crop, scale = self.load_image(calib, info, flow_bbox[0], stamp)
coord = self.encode_coord(calib, scale, flow_bbox)
all_location[i] = location
all_velocity[i] = velocity
all_img[i] = crop
all_stamps[i] = 0.001 * float(stamp)
all_DR_vel[i] = velocity
all_r_vel[i] = velocity
all_DR_pos[i] = location
all_coord.append(coord)
all_coord = torch.stack(all_coord, dim=0)
all_2D_bbox = torch.from_numpy(all_2D_bbox)
all_location = torch.from_numpy(all_location)
all_velocity = torch.from_numpy(all_velocity)
all_img = torch.from_numpy(all_img)
all_stamps = torch.from_numpy(all_stamps)
all_weight = torch.from_numpy(all_weight)
all_DR = torch.from_numpy(all_DR)
all_DR_vel = torch.from_numpy(all_DR_vel)
all_r_vel = torch.from_numpy(all_r_vel)
all_DR_pos = torch.from_numpy(all_DR_pos)
all_dr_mat = torch.tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]]).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
t = all_stamps - all_stamps.mean()
if self.random_erase > 0:
a = torch.rand(self.nframe)
v = torch.sort(a)[0][int(self.random_erase * self.nframe)] * random.random()
all_weight = ~(a < v)
sample = {'img': all_img,
'rois': all_2D_bbox,
'location': all_location,
'velocity': all_velocity,
'coord': all_coord,
't': t,
'all_weight': all_weight,
'all_DR': all_DR,
'all_DR_vel': all_DR_vel,
'all_r_vel': all_r_vel,
'all_r_pos': all_location[..., [0, 2]],
'all_DR_pos': all_DR_pos,
'all_dr_mat': all_dr_mat,
'cam2bev': torch.eye(3).float()
}
if self.test_mode == False:
sample = self.augment(sample)
return sample
def crop_image(self, info, anno, stamp):
if (self.img_rsz[0] == 192) & (self.img_rsz[1] == 224):
img_path = '{}/{}/{}/{}.bmp'.format(self.img_root, info['video'], info['gid'], stamp)
crop = self.load_img_decry2(img_path)
scale = anno['scale']
bbox = np.array(anno['roi'])
# 1-2批数据是左上角抠图
if self.center_base and (info['batch'] == 'radar_crop_info_batch12'):
flow = anno['flow']
scale = anno['scale']
cx = 0.5 * (flow[2] - flow[0]) * scale
cy = 0.5 * (flow[3] - flow[1]) * scale
dx = int(crop.shape[1] / 2 - cx)
dy = int(crop.shape[0] / 2 - cy)
tsf = np.zeros_like(crop)
tsf[dy:crop.shape[0] - dy, dx:crop.shape[1] - dx] = crop[:crop.shape[0] - 2 * dy, :crop.shape[1] - 2 * dx]
crop = tsf
elif (self.img_rsz[0] == 128) & (self.img_rsz[1] == 128):
try:
img_path = '{}/{}/{}/{}-128x128.bmp'.format(self.img_root, info['video'], info['gid'], stamp)
crop = self.load_img_decry2(img_path)
except:
img_path = '{}/{}/{}/{}-128x128.bmp'.format('/data1/DataSets/cropsets/imgs', info['video'], info['gid'], stamp)
crop = self.load_img_decry2(img_path)
try:
scale = anno['scale_128']
if 'roi_128' in anno:
bbox = np.array(anno['roi_128'])
else:
bbox = None
except:
if self.test_mode:
scale = anno['scale']
bbox = None
else:
raise "no scale info"
else:
raise "not inplement img size"
return crop, bbox, scale
def get_data_info_radar(self, idx):
info = copy.deepcopy(self.infos[idx])
stamps = self.get_stamps(info)
if self.test_mode:
avm2car = self.data[info['batch']][info['video']]['avm2car']
cam2car = self.data[info['batch']][info['video']]['came2car']
bev2cam = self.data[info['batch']][info['video']]['bev2cam']
else:
avm2car = self.pkl_calib[info['batch']][info['video']]['avm2car']
cam2car = self.pkl_calib[info['batch']][info['video']]['came2car']
bev2cam = self.pkl_calib[info['batch']][info['video']]['bev2cam']
all_type = np.ones((self.nframe), dtype=np.float32)
all_location = np.zeros((self.nframe, 3), dtype=np.float32)
all_velocity = np.zeros((self.nframe, 3), dtype=np.float32)
all_img = np.zeros((self.nframe, 3, self.img_rsz[0], self.img_rsz[1]), dtype=np.float32)
all_weight = np.ones((self.nframe), dtype=np.float32)
all_stamps = np.zeros((self.nframe), dtype=np.float32)
all_DR = np.zeros((self.nframe, 3), dtype=np.float32)
all_DR_vel = np.zeros((self.nframe, 3), dtype=np.float32)
all_r_vel = np.zeros((self.nframe, 3), dtype=np.float32)
all_DR_pos = np.zeros((self.nframe, 3), dtype=np.float32)
all_2D_bbox = np.zeros((self.nframe, 4), dtype=np.float32)
all_sp_vel = np.zeros((self.nframe), dtype=np.float32)
all_sp_acc = np.zeros((self.nframe), dtype=np.float32)
all_coord = []
for i, stamp in enumerate(stamps):
if self.test_mode:
anno = copy.deepcopy(self.data[info['batch']][info['video']]['data'][info['gid']][stamp])
else:
anno = mmcv.load('{}/{}/{}/{}/{}.pkl'.format(self.pkl_root, info['batch'], info['video'], info['gid'], stamp))
location = anno['location']
velocity = anno['velocity']
flow_bbox = np.array([anno['flow']])
dr = np.array([anno['dr']])
dr_vel = np.array([anno['dr_vel']])
r_vel = np.array([anno['r_vel']])
dr_pos = np.array([anno['dr_pos']])
# load image
crop, rect_2d, scale = self.crop_image(info, anno, stamp)
crop = torch.from_numpy(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB).astype(np.float32).transpose(2, 0, 1) / 255)
# coord
coord = self.encode_coord(None, scale, flow_bbox)
all_coord.append(coord)
all_location[i] = location
all_velocity[i] = velocity
all_img[i] = crop
all_stamps[i] = 0.001 * float(stamp)
all_DR[i] = dr
all_DR_vel[i] = dr_vel
all_r_vel[i] = r_vel
all_DR_pos[i] = dr_pos
all_2D_bbox[i] = rect_2d
if self.test_mode == False:
all_sp_vel[i] = anno['sp_vel']
all_sp_acc[i] = anno['sp_acc']
all_location = torch.from_numpy(all_location)
all_velocity = torch.from_numpy(all_velocity)
all_img = torch.from_numpy(all_img)
all_coord = torch.from_numpy(np.stack(all_coord, axis=0))
all_stamps = torch.from_numpy(all_stamps)
all_weight = torch.from_numpy(all_weight)
all_DR = torch.from_numpy(all_DR)
all_DR_vel = torch.from_numpy(all_DR_vel)
all_r_vel = torch.from_numpy(all_r_vel)
all_DR_pos = torch.from_numpy(all_DR_pos)
all_2D_bbox = torch.from_numpy(all_2D_bbox)
all_sp_vel = torch.from_numpy(all_sp_vel)
all_sp_acc = torch.from_numpy(all_sp_acc)
all_type = torch.from_numpy(all_type) * info['type']
if self.feat_out:
mean_dr_x = 0
mean_dr_y = 0
else:
mean_dr_x = all_DR[..., 0].mean()
mean_dr_y = all_DR[..., 1].mean()
rot_mat = torch.eye(3).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
tns_mat = torch.eye(3).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
chg_mat = torch.tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]]).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
rot_mat[:, 0, 0] = torch.cos(all_DR[..., -1])
rot_mat[:, 0, 1] = -torch.sin(all_DR[..., -1])
rot_mat[:, 0, 2] = all_DR[..., 0] - mean_dr_x
rot_mat[:, 1, 0] = torch.sin(all_DR[..., -1])
rot_mat[:, 1, 1] = torch.cos(all_DR[..., -1])
rot_mat[:, 1, 2] = all_DR[..., 1] - mean_dr_y
tns_mat[:, 0, 2] = -cam2car[0] + avm2car[0]
tns_mat[:, 1, 2] = -cam2car[1] + avm2car[1]
all_dr_mat = torch.matmul(torch.matmul(rot_mat, tns_mat), chg_mat)
all_DR_pos[..., 0] = all_DR_pos[..., 0] - mean_dr_x
all_DR_pos[..., 2] = all_DR_pos[..., 2] - mean_dr_y
t = all_stamps - all_stamps.mean()
theta = all_DR[..., -1]
r_mat = torch.stack((torch.cos(theta), torch.sin(theta), -torch.sin(theta), torch.cos(theta)), dim=-1).reshape(len(theta), 2, 2)
r_pos = torch.matmul(r_mat, all_DR_pos[..., [0, 2]].unsqueeze(dim=-1)).squeeze(dim=-1)
if self.random_erase > 0:
a = torch.rand(self.nframe)
v = torch.sort(a)[0][int(self.random_erase * self.nframe)] * random.random()
all_weight = ~(a < v)
sample = {'img': all_img,
'rois': all_2D_bbox,
'location': all_location,
'velocity': all_velocity,
'coord': all_coord,
't': t,
'all_weight': all_weight,
'all_DR': all_DR,
'all_DR_vel': all_DR_vel,
'all_r_vel': all_r_vel,
'all_r_pos': r_pos,
'all_DR_pos': all_DR_pos,
'all_dr_mat': all_dr_mat,
'cam2bev': torch.inverse(torch.tensor(bev2cam).float().reshape(3, 3)),
'all_sp_vel': all_sp_vel,
'all_sp_acc': all_sp_acc,
'all_type': all_type.long(),
}
if (self.test_mode == False) | self.feat_out:
sample = self.augment(sample)
if random.random() < self.drop_dr_ratio:
sample['all_DR'] *= 0
sample['all_DR_vel'] = copy.deepcopy(sample['velocity'])
sample['all_r_vel'] = copy.deepcopy(sample['velocity'])
sample['all_DR_pos'] = copy.deepcopy(sample['location'])
sample['all_dr_mat'] = torch.eye(3).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
return sample
def ScaleOffsetFlip(self, image, bbox):
# 图像变换
height, width = image.shape[:2]
tx, ty = width * random.uniform(-0.2, 0.2), height * random.uniform(-0.2, 0.2)
s = random.uniform(0.8, 1.2)
M_combined = np.float32([[s, 0, tx], [0, s, ty]])
aux_img = cv2.warpAffine(image, M_combined, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)
# 坐标变换
points = np.float32([[bbox[0], bbox[1], 1], [bbox[2], bbox[3], 1]])
points = np.matmul(np.expand_dims(M_combined, axis=0), points.reshape(2, 3, 1)).reshape(-1).tolist()
points[0] = min(max(points[0], 0), width - 1)
points[2] = min(max(points[2], 0), width)
points[1] = min(max(points[1], 0), height - 1)
points[3] = min(max(points[3], 0), height)
img = torch.from_numpy(cv2.cvtColor(aux_img, cv2.COLOR_BGR2RGB).astype(np.float32).transpose(2, 0, 1) / 255)
roi = torch.tensor(points).float()
return img, roi
def crop_image_bbox(self, img, bbox, scale):
if (self.img_rsz[0] == 192) & (self.img_rsz[1] == 224):
pass
elif (self.img_rsz[0] == 128) & (self.img_rsz[1] == 128):
crop_192x192 = img[:, 16:208, :]
img = cv2.resize(crop_192x192, (128, 128), interpolation=cv2.INTER_LINEAR)
bbox[0::2] -= 16
bbox *= 128 / 192
scale *= 128 / 192
elif (self.img_rsz[0] == 160) & (self.img_rsz[1] == 160):
crop_192x192 = img[:, 16:208, :]
img = cv2.resize(crop_192x192, (160, 160), interpolation=cv2.INTER_LINEAR)
bbox[0::2] -= 16
bbox *= 160 / 192
scale *= 160 / 192
elif (self.img_rsz[0] == 192) & (self.img_rsz[1] == 192):
crop_192x192 = img[:, 16:208, :]
img = crop_192x192
bbox[0::2] -= 16
else:
raise "not inplement img size"
return img, bbox, scale
def get_data_info_bbox(self, ):
all_img = torch.zeros(self.bbox_num, 3, self.img_rsz[0], self.img_rsz[1]).float()
all_2D_bbox = torch.zeros(self.bbox_num, 4).float()
all_coord = []
for i in range(self.bbox_num):
idx = self.bbox_idx % len(self.bbox_infos)
self.bbox_idx += 1
info = copy.deepcopy(self.bbox_infos[idx])
img = cv2.imdecode(np.fromfile(info['img_path'], dtype=np.uint8), 1)
bbox = np.array(copy.deepcopy(info['bbox']))
img, bbox, _ = self.crop_image_bbox(img, bbox, 1)
img, roi = self.ScaleOffsetFlip(img, bbox)
all_img[i] = img
all_2D_bbox[i] = roi
coord = self.encode_coord(None, 0, None)
all_coord.append(coord)
all_coord = torch.stack(all_coord, dim=0)
sample = {'img': all_img,
'rois': all_2D_bbox,
}
if self.pe_dim == 0:
sample['coord'] = all_coord
if self.test_mode == False:
sample = self.augment(sample)
return sample
def __getitem__(self, idx):
while 1:
try:
if 'radar' in self.infos[idx]['batch']:
sample = self.get_data_info_radar(idx)
else:
sample = self.get_data_info(idx)
if self.test_mode == False:
aux_sample = self.get_data_info_bbox()
for key in aux_sample:
sample[key] = torch.cat((sample[key], aux_sample[key]), dim=0)
break
except:
print('>>>error occured:', idx)
idx = random.randint(0, len(self.infos) - 1)
continue
return sample
def format_results(self, outputs, **kwargs):
all_results = {}
for info, res in zip(self.infos, outputs):
res = {key: res[key].cpu().detach().numpy() if isinstance(res[key], torch.Tensor) else res[key] for key in res}
res['img_folder'] = info['img_folder']
try:
stamp = info['stamps'][-1]
except:
stamp = info['sweeps'][-1]
key = '{}+{}+{}'.format(info['batch'], info['video'], info['gid'])
if key not in all_results:
all_results[key] = {}
all_results[key][stamp] = res
mmcv.dump(all_results, kwargs['pklfile_prefix'])
find_unused_parameters = True file_client_args = dict(backend='disk') camera_inner = [[1489.8095, 0.0, 961.03457], [0.0, 1489.8272, 532.078111], [0.0, 0.0, 1.0]] data = dict( samples_per_gpu=16, workers_per_gpu=4, train=dict( type='SmokeCropLoaderRadarFuse3', pair_file=[ '/share/wuziyang/DataSets/annos_info/all_sp_data_type.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch6-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch7-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch8-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch9-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch10-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch11-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch0613-acc-sp-type-bbox.pkl' ], seq_root='/share/wuziyang/DataSets/annos_info/seq_files', img_root='/dataset/zhangjingwei/VelocityCropSet', pkl_root='/share/wuziyang/DataSets/annos_info/pkl_files', auxy_root='/share/wuziyang/DataSets/cropsets/bbox', custom_mtx=[[1489.8095, 0.0, 961.03457], [0.0, 1489.8272, 532.078111], [0.0, 0.0, 1.0]], img_rsz=[128, 128], nframe=30, pe_dim=0, skip=5, ref_coord=False, center_base=True, random_erase=0.0, crop_noise=True, drop_dr_ratio=0, random_choose=True, filter_ignore=False, main_type=['Truck', 'Bus', 'Car', '2wheel', '3wheel', 'Human', 'Fake'], test_mode=False), val=dict( type='SmokeCropLoaderRadarFuse3', pair_file= # '/share/wuziyang/mono_velocity-master/data/cipv_train_120_309_BY_OS_WP_XY_4_filter.pkl', ['/share/wuziyang/DataSets/annos_info/crop_info_batch_test-acc.pkl'], img_root='/dataset/zhangjingwei/VelocityCropSet', pkl_root='/share/wuziyang/DataSets/annos_info/pkl_files', custom_mtx=[[1489.8095, 0.0, 961.03457], [0.0, 1489.8272, 532.078111], [0.0, 0.0, 1.0]], pe_dim=0, skip=1, ref_coord=False, img_rsz=[128, 128], nframe=30, center_base=True, crop_noise=False, test_mode=True), test=dict( type='SmokeCropLoaderRadarFuse3', pair_file=['/share/wuziyang/DataSets/annos_info/crop_info_batch_test-acc.pkl'], img_root='/dataset/zhangjingwei/VelocityCropSet', pkl_root='/share/wuziyang/DataSets/annos_info/pkl_files', custom_mtx=[[1489.8095, 0.0, 961.03457], [0.0, 1489.8272, 532.078111], [0.0, 0.0, 1.0]], pe_dim=0, skip=1, ref_coord=False, img_rsz=[128, 128], nframe=30, center_base=True, crop_noise=False, main_type=['Truck', 'Bus', 'Car'], test_mode=True)) model = dict( type='RadarVelocityBboxOnnxFuse3', backbone=None, neck=None, head=None, bbox_type='x1y1x2y2', roi_scale=True, use_scale=False, train_cfg=None, test_cfg=None, img_sz=[128, 128], feat_dim=256, use_bn=False, use_weight=True, alpha_acc=10, classify=False, poly=3) total_epochs = 6 runner = dict(type='EpochBasedRunner', max_epochs=6) optimizer = dict(type='Adam', lr=0.0001) optimizer_config = dict(grad_clip=None) lr_config = dict(policy='step', warmup=None, step=[3, 4, 5]) evaluation = dict(interval=360, pipeline=None) checkpoint_config = dict(interval=1) log_config = dict( interval=50, hooks=[dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook')]) custom_hooks = [dict(type='Custom3DHook', save_iter=-1, save_path='')] dist_params = dict(backend='nccl') log_level = 'INFO' workflow = [('train', 1)] opencv_num_threads = 0 mp_start_method = 'fork'
resume_from = '/share/wuziyang/mono_velocity-master/results/9fram-bbox-onnx-fuse-weight-sp-3/latest.pth'
resume_from = None
load_from = '/share/wuziyang/velocity_model/G3090/work_dir/from_wuziyang/v0509/epoch_6.pth' work_dir = '/share/wuziyang/velocity_model/G3090/work_dir/from_wuziyang/v0524+truck+tunnel'
config = '/share/wuziyang/opensourcelib/G3090/projects/configs/velocity/radar_velocity_onnx_fuse_sp.py'
gpu_ids = range(0, 1)