Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Fun772283153/UnnamedProject

Repository files navigation

UnnamedProject

@DETECTORS.register_module() class RadarVelocityBboxOnnxFuse3(nn.Module): def init(self, backbone=None, neck=None, head=None, bbox_type='sigmoid', use_scale=False, roi_type='align', img_sz=[192, 224], poly=2, feat_dim=256, use_bn=True, use_weight=False, classify=False, alpha_dist=0.1, alpha_regu=0.1, alpha_acc=10, alpha_last=5, alpha_weight=0.01, alpha_bbox=1, **kwargs): super(RadarVelocityBboxOnnxFuse3, self).init() self.classify = classify self.alpha_acc = alpha_acc self.alpha_bbox = alpha_bbox self.alpha_dist = alpha_dist self.alpha_regu = alpha_regu self.alpha_last = alpha_last self.alpha_weight = alpha_weight self.use_weight = use_weight self.use_bn = use_bn self.feat_dim = feat_dim self.img_sz = img_sz self.bbox_type = bbox_type self.bbox_type = bbox_type self.use_scale = use_scale self.roi_type = roi_type self.poly = poly self.init_layers() self.init_weights()

def init_weights(self):
 for m in self.modules():
 if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
 if m.bias is not None:
 m.bias.data.zero_()
 elif isinstance(m, nn.BatchNorm2d):
 torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
 torch.nn.init.constant_(m.bias.data, 0.0)
 elif isinstance(m, nn.Linear):
 torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
 torch.nn.init.constant_(m.bias.data, 0.0)
def init_layers(self,):
 ht_size = max(self.img_sz[0] // 32, self.img_sz[1] // 32)
 grid_sz = 2 * (ht_size // 2) + 1
 if self.use_bn:
 self.conv2a = conv_bn(7, 32, kernel_size=3, stride=2)
 self.conv2aa = conv_bn(32, 32, kernel_size=3, stride=1)
 self.conv2b = conv_bn(32, 32, kernel_size=3, stride=1)
 self.conv3a = conv_bn(32, 64, kernel_size=3, stride=2)
 self.conv3aa = conv_bn(64, 64, kernel_size=3, stride=1)
 self.conv3b = conv_bn(64, 64, kernel_size=3, stride=1)
 self.conv4a = conv_bn(64, 96, kernel_size=3, stride=2)
 self.conv4aa = conv_bn(96, 96, kernel_size=3, stride=1)
 self.conv4b = conv_bn(96, 96, kernel_size=3, stride=1)
 self.conv5a = conv_bn(96, 128, kernel_size=3, stride=2)
 self.conv5aa = conv_bn(128, 128, kernel_size=3, stride=1)
 self.conv5b = conv_bn(128, 128, kernel_size=3, stride=1)
 self.conv6aa = conv_bn(128, self.feat_dim, kernel_size=3, stride=2)
 self.conv6a = conv_bn(self.feat_dim, self.feat_dim, kernel_size=3, stride=1)
 self.conv6b = conv_bn(self.feat_dim, self.feat_dim, kernel_size=3, stride=1)
 else:
 self.conv2a = conv(7, 32, kernel_size=3, stride=2)
 self.conv2aa = conv(32, 32, kernel_size=3, stride=1)
 self.conv2b = conv(32, 32, kernel_size=3, stride=1)
 self.conv3a = conv(32, 64, kernel_size=3, stride=2)
 self.conv3aa = conv(64, 64, kernel_size=3, stride=1)
 self.conv3b = conv(64, 64, kernel_size=3, stride=1)
 self.conv4a = conv(64, 96, kernel_size=3, stride=2)
 self.conv4aa = conv(96, 96, kernel_size=3, stride=1)
 self.conv4b = conv(96, 96, kernel_size=3, stride=1)
 self.conv5a = conv(96, 128, kernel_size=3, stride=2)
 self.conv5aa = conv(128, 128, kernel_size=3, stride=1)
 self.conv5b = conv(128, 128, kernel_size=3, stride=1)
 self.conv6aa = conv(128, self.feat_dim, kernel_size=3, stride=2)
 self.conv6a = conv(self.feat_dim, self.feat_dim, kernel_size=3, stride=1)
 self.conv6b = conv(self.feat_dim, self.feat_dim, kernel_size=3, stride=1)
 self.relu = nn.ReLU(inplace=False)
 self.roi_conv1 = nn.Conv2d(self.feat_dim, 256, grid_sz, stride=1, padding=0)
 self.roi_conv2 = nn.Conv2d(256, 256, 1, stride=1, padding=0)
 self.wt_fc = nn.Linear(256, 1)
 self.mv_fc1 = nn.Linear(256, 128)
 self.mv_fc4 = nn.Linear(128, 64)
 self.mv_fc5 = nn.Linear(64, 3)
 self.dis_X_encoding = nn.Linear(256, 64)
 self.dis_Y_encoding = nn.Linear(256, 64)
 self.dis_Z_encoding = nn.Linear(256, 64)
 self.bbox_fc = nn.Sequential(nn.Conv2d(self.feat_dim, 64, 3, stride=1, padding=0),
 nn.ReLU(inplace=False),
 nn.AdaptiveAvgPool2d(1),
 nn.Conv2d(64, 7, 1, stride=1, padding=0),
 # nn.Sigmoid()
 )
 if self.use_weight:
 self.wt_fc = nn.Sequential(nn.AdaptiveAvgPool2d(1),
 nn.Conv2d(self.feat_dim, 1, 1, stride=1, padding=0),
 nn.Sigmoid()
 )
 # *******************************************************************
 self.dis_fc = nn.Sequential(nn.Linear(128, 64),
 nn.ReLU(inplace=True),
 nn.Linear(64, 2))
 self.vel_fc = nn.Sequential(nn.Linear(128, 64),
 nn.ReLU(inplace=True),
 nn.Linear(64, 2))
 self.car_roi = RoIAlign((grid_sz, grid_sz), aligned=False)
 if self.classify:
 self.class_fc = nn.Sequential(nn.Linear(256, 9))
 self.roi_scale = torch.tensor([[0, 0, ht_size, ht_size, ht_size, ht_size, 0]]).float()
 self.grid_sz = grid_sz
 # ******************************************************
 self.conf_loss = build_loss(dict(type='UncertainL2Loss'))
 self.iou_loss = build_loss(dict(type='GIoULoss'))
 self.bbox_loss = build_loss(dict(type='L1Loss'))
 self.cls_loss = build_loss(dict(type='CrossEntropyLoss'))
def loss(self, output, kwargs,):
 bs = len(kwargs['img'])
 nframe = kwargs['all_DR'].size(1)
 # world wise
 position_pred = output['dis'][..., [0, 1]]
 smooth_pred = output['s_dis'][..., [0, 1]]
 velocity_pred = output['vel']
 velocity_true = kwargs['all_DR_vel'][..., [0, 2]]
 position_true = kwargs['all_DR_pos'][..., [0, 2]]
 velocity_pred_r = output['rotate_vel'][..., [-1]]
 #velocity_true_r = kwargs['all_r_vel'][..., [2]]
 velocity_true_r = kwargs['all_sp_vel'].unsqueeze(dim=-1)
 position_pred_r = output['rotate_pos'][..., [-1]]
 position_true_r = kwargs['all_r_pos'][..., [-1]]
 acc_pred = output['rotate_acc'][..., -1]
 if self.poly == 3:
 acc_true = kwargs['all_sp_acc'] * kwargs['t']
 elif self.poly == 4:
 acc_true = kwargs['all_sp_acc']
 # dist
 loss_dist = self.alpha_dist * torch.norm(position_true - position_pred, p=2, dim=-1).mean()
 loss_smoo = self.alpha_dist * torch.norm(position_true - smooth_pred, p=2, dim=-1).mean()
 loss_disr_r = self.alpha_dist * F.mse_loss(position_true_r, position_pred_r)
 # velocity
 loss_velc = torch.norm(velocity_true - velocity_pred, p=2, dim=-1).mean()
 loss_rotate = F.mse_loss(velocity_true_r, velocity_pred_r)
 loss_last = self.alpha_last * F.mse_loss(velocity_true_r[:, -1], velocity_pred_r[:, -1])
 loss_mv = F.mse_loss(velocity_true_r.mean(dim=1), output['m_vel'][..., [-1]])
 # bbox
 pred_bbox = output['rois'].reshape(-1, 4) #.reshape(bs, -1, 4)[:, -1]
 gt_bbox = kwargs['rois'].reshape(-1, 4) #[:, -1]
 inds = gt_bbox.abs().sum(-1) > 0
 pred_bbox = pred_bbox[inds]
 gt_bbox = gt_bbox[inds]
 loss_bbox = self.alpha_bbox * self.bbox_loss(pred_bbox, gt_bbox)
 loss_giou = self.alpha_bbox * self.iou_loss(pred_bbox, gt_bbox)
 # regular
 l1_reg = self.alpha_regu * torch.norm(output['acc_p'], p=1, dim=-1).mean()
 l1_acc = self.alpha_acc * F.mse_loss(acc_pred, acc_true)
 # dynamic
 vel_std = (velocity_true_r - velocity_pred_r).var(dim=1).sum(-1).mean()
 pos_std = self.alpha_dist * (position_true_r - position_pred_r).var(dim=1).sum(-1).mean()
 extra = output['extra']
 if extra is not None:
 loss_extra = (torch.exp(torch.clamp_min(extra[..., :2] - extra[..., 2:], 0)) - 1).sum()
 else:
 loss_extra = torch.zeros_like(loss_giou)
 loss = {'loss_dist': loss_dist, 'loss_smooth': loss_smoo, 'loss_disr_r': loss_disr_r,
 'loss_velc': loss_velc, 'loss_rotate': loss_rotate, 'loss_last': loss_last,
 'loss_bbox': loss_bbox, 'loss_giou': loss_giou, 'loss_extra': loss_extra,
 'loss_reg': l1_reg, 'loss_vel_var': vel_std, 'loss_pos_var': pos_std, 'loss_acc': l1_acc,
 'loss_mv': loss_mv,
 }
 if self.classify:
 pred = output['cls']
 true = kwargs['all_type'].view(-1)
 loss_cls = self.cls_loss(pred, true)
 loss['loss_cls'] = loss_cls
 # weight
 if self.use_weight:
 pred_weight = output['weight']
 gt_weight = torch.ones_like(output['weight'])
 loss_weight = self.alpha_weight * torch.norm(pred_weight - gt_weight, p=2).mean()
 loss['loss_weight'] = loss_weight
 return loss
def get_predictions(self, res, kwargs):
 # out = {'pred_z': res['s_dis'][0, -1, [0]],
 # 'pred_t': res['vel'][0, -1]}
 out = {
 'pred_r': res['rotate_pos'][0, -1, [1]],
 'pred_z': res['s_dis'][0, 1, [1]],
 'pred_t': res['rotate_vel'][0, -1]
 }
 if 'location' in kwargs:
 # out['gt_t'] = kwargs['velocity'][0, -1, [0, 2]]
 # out['gt_z'] = kwargs['location'][0, -1, [2]]
 out['gt_t'] = kwargs['all_r_vel'][0, -1, [0, 2]]
 out['gt_z'] = kwargs['location'][0, -1, [2]]
 out['gt_r'] = kwargs['all_r_pos'][0, -1, [1]]
 else:
 out['gt_t'] = None
 out['gt_z'] = None
 out['gt_r'] = None
 if 0:
 img = kwargs['img'][0]
 bbox = res['rois']
 for n in range(img.size(0)):
 image = transforms.ToPILImage()(img[n]).convert('RGB')
 draw = ImageDraw.Draw(image)
 draw.rectangle([bbox[n, 0], bbox[n, 1], bbox[n, 2], bbox[n, 3]], outline='red')
 image.save("/data1/mono_velocity-master/crop/%03d_b.png" % (self.save_idx))
 self.save_idx += 1
 return out
def get_transf_mat(self, t, w, y):
 if self.poly == 2:
 n = w.sum(-1)
 t_sum = (w * t).sum(-1)
 t2_sum = (w * t * t).sum(-1)
 mat = torch.stack((n, t_sum, t_sum, t2_sum), dim=-1).reshape(t.size(0), 2, 2)
 #t_mat = torch.inverse(mat)
 t_mat = torch.pinverse(mat)
 y_sum = (w.unsqueeze(dim=-1) * y).sum(dim=1)
 ty_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1)).sum(dim=1)
 obser = torch.stack((y_sum, ty_sum), dim=1)
 elif self.poly == 3:
 n = w.sum(-1)
 t_sum = (w * t).sum(-1)
 t2_sum = (w * t * t).sum(-1)
 t3_sum = (w * t * t * t).sum(-1)
 t4_sum = (w * t * t * t * t).sum(-1)
 mat = torch.stack((n, t_sum, t2_sum, t_sum, t2_sum, t3_sum, t2_sum, t3_sum, t4_sum), dim=-1).reshape(t.size(0), 3, 3)
 #t_mat = torch.inverse(mat)
 try:
 t_mat = torch.pinverse(mat)
 except:
 print('error cooured:', mat)
 print(t)
 t_mat = torch.eye(3).float().to(mat.device).unsqueeze(dim=0).repeat(t.size(0), 3, 3)
 y_sum = (w.unsqueeze(dim=-1) * y).sum(dim=1)
 ty_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1)).sum(dim=1)
 t2y_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1) * t.unsqueeze(dim=-1)).sum(dim=1)
 obser = torch.stack((y_sum, ty_sum, t2y_sum), dim=1)
 elif self.poly == 4:
 n = w.sum(-1)
 t_sum = (w * t).sum(-1)
 t2_sum = (w * t * t).sum(-1)
 t3_sum = (w * t * t * t).sum(-1)
 t4_sum = (w * t * t * t * t).sum(-1)
 t5_sum = (w * t * t * t * t * t).sum(-1)
 t6_sum = (w * t * t * t * t * t * t).sum(-1)
 mat = torch.stack((n, t_sum, t2_sum, t3_sum,
 t_sum, t2_sum, t3_sum, t4_sum,
 t2_sum, t3_sum, t4_sum, t5_sum,
 t3_sum, t4_sum, t5_sum, t6_sum), dim=-1).reshape(t.size(0), 4, 4)
 #t_mat = torch.inverse(mat)
 t_mat = torch.pinverse(mat)
 y_sum = (w.unsqueeze(dim=-1) * y).sum(dim=1)
 ty_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1)).sum(dim=1)
 t2y_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1) * t.unsqueeze(dim=-1)).sum(dim=1)
 t3y_sum = (w.unsqueeze(dim=-1) * y * t.unsqueeze(dim=-1) * t.unsqueeze(dim=-1) * t.unsqueeze(dim=-1)).sum(dim=1)
 obser = torch.stack((y_sum, ty_sum, t2y_sum, t3y_sum), dim=1)
 else:
 t_mat = None
 raise 'not inplement'
 params = torch.matmul(t_mat, obser)
 return params
def post_process(self, x_embed, y_embed, z_embed, w, kwargs):
 # to bev
 cam2bev = kwargs['cam2bev'].unsqueeze(dim=1)
 coord = torch.stack((x_embed, y_embed, z_embed), dim=-2)
 xyz_feat = torch.matmul(cam2bev, coord)
 x_feat = xyz_feat[:, :, 0]
 z_feat = xyz_feat[:, :, 1]
 y_feat = xyz_feat[:, :, 2]
 # to worldwise
 p = torch.ones_like(x_feat)
 embed = torch.stack((x_feat, y_feat, p), dim=-2)
 res = torch.matmul(kwargs['all_dr_mat'], embed)
 dr_x = res[:, :, 0]
 dr_y = res[:, :, 1]
 embed = torch.cat((dr_x, dr_y), dim=-1)
 z = embed #self.relu(self.mv_fc1(embed))
 # transform
 avg = z.mean(dim=1).unsqueeze(dim=1)
 y = z - avg
 params = self.get_transf_mat(kwargs['t'], w, y)
 # relative distance/velocity
 t = kwargs['t'].unsqueeze(dim=-1)
 dis = self.mv_fc5(self.relu(self.mv_fc4(z)))
 if self.poly == 2:
 s_z = avg + params[:, [0]] + params[:, [1]] * t
 s_v = params[:, [1]]
 s_a = torch.zeros_like(s_v)
 acc = None
 elif self.poly == 3:
 # params[:, [0]]: dis feature, params[:, [1]]: velocity feature, params[:, [2]]: acceleration feature
 s_z = avg + params[:, [0]] + params[:, [1]] * t + params[:, [2]] * t * t
 s_v = params[:, [1]] + 2 * params[:, [2]] * t
 m_v = params[:, [1]]
 s_a = 2 * params[:, [2]]
 acc = self.vel_fc(s_a * t)
 elif self.poly == 4:
 s_z = avg + params[:, [0]] + params[:, [1]] * t + params[:, [2]] * t * t + params[:, [3]] * t * t * t
 s_v = params[:, [1]] + 2 * params[:, [2]] * t + 3 * params[:, [3]] * t * t
 s_a = 2 * params[:, [2]] + 6 * params[:, [3]] * t
 acc = self.vel_fc(s_a)
 else:
 raise 'not inplement'
 s_dis = self.mv_fc5(self.relu(self.mv_fc4(s_z)))
 vel = self.vel_fc(s_v)
 m_vel = self.vel_fc(m_v).squeeze(dim=1)
 theta = kwargs['all_DR'][:, :, -1]
 r_mat = torch.stack((torch.cos(theta), torch.sin(theta), -torch.sin(theta), torch.cos(theta)), dim=-1).reshape(t.size(0), t.size(1), 2, 2)
 r_vel = torch.matmul(r_mat, vel.unsqueeze(dim=-1)).squeeze(dim=-1)
 r_pos = torch.matmul(r_mat, dis[..., :2].unsqueeze(dim=-1)).squeeze(dim=-1)
 r_acc = torch.matmul(r_mat, acc.unsqueeze(dim=-1)).squeeze(dim=-1)
 m_theta = theta.mean(dim=1)
 m_mat = torch.stack((torch.cos(m_theta), torch.sin(m_theta), -torch.sin(m_theta), torch.cos(m_theta)), dim=-1).reshape(t.size(0), 2, 2)
 m_rv = torch.matmul(m_mat, m_vel.unsqueeze(dim=-1)).squeeze(dim=-1)
 res = {'dis': dis, 'vel': vel, 's_dis': s_dis, 'rotate_vel': r_vel, 'rotate_pos': r_pos, 'acc_p': s_a, 'rotate_acc': r_acc, 'm_vel': m_rv}
 return res
def forward(self, return_loss=False, **kwargs):
 b, k, c, h, w = kwargs['img'].shape
 n = kwargs['all_r_vel'].size(1)
 im = kwargs['img']
 coord = kwargs['coord']
 im1 = torch.cat((im, coord), dim=2).view(-1, im.size(2)+coord.size(2), h, w)
 c12 = self.conv2b(self.conv2aa(self.conv2a(im1)))
 c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
 c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
 c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
 c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
 feat = c16.reshape(b, -1, self.feat_dim, c16.size(2), c16.size(3))[:, :n].reshape(-1, self.feat_dim, c16.size(2), c16.size(3))
 rois = (self.roi_scale.to(im.device) * self.bbox_fc(c16).squeeze().sigmoid())[..., [2, 3, 4, 5]].reshape(-1, 1, 4)
 # z = torchvision.ops.roi_align(feat, list(rois.reshape(b, -1, 1, 4)[:, :n].reshape(-1, 1, 4)), (self.grid_sz, self.grid_sz))
 roi_ids = torch.arange(n * b, dtype=torch.int32, device=rois.device).unsqueeze(-1)
 z = self.car_roi(feat, torch.cat((roi_ids, rois.reshape(b, -1, 4)[:, :n].reshape(-1, 4)), dim=1))
 z = self.roi_conv1(z)
 z = self.relu(z)
 z = self.roi_conv2(z)
 z = self.relu(z)
 z = z.view(b, n, 256)
 if self.classify:
 cls = self.class_fc(z).view(b*n, -1)
 else:
 cls = None
 cus_w = kwargs['all_weight']
 if self.use_weight:
 w = self.wt_fc(c16).reshape(b, k)[..., :n] * cus_w
 else:
 w = cus_w
 x_embed = self.dis_X_encoding(z)
 y_embed = self.dis_Y_encoding(z)
 z_embed = self.dis_Z_encoding(z)
 # *******************************************************************
 aux = self.post_process(x_embed, y_embed, z_embed, w, kwargs)
 if self.training:
 res = {'rois': rois * 32, 'extra': rois, 'weight': w, 'cls': cls}
 res.update(aux)
 loss = self.loss(res, kwargs)
 return loss
 else:
 res = {'rois': rois * 32}
 res.update(aux)
 out = self.get_predictions(res, kwargs)
 return [out]
def train_step(self, data, optimizer, **kwargs):
 losses = self(**data)
 loss, log_vars = self._parse_losses(losses)
 outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(data['velocity']))
 return outputs
def val_step(self, data, optimizer=None, **kwargs):
 losses = self(**data)
 loss, log_vars = self._parse_losses(losses)
 outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(data['img_metas']))
 return outputs
def _parse_losses(self, losses, **kwargs):
 log_vars = OrderedDict()
 for loss_name, loss_value in losses.items():
 if isinstance(loss_value, torch.Tensor):
 log_vars[loss_name] = loss_value.mean()
 elif isinstance(loss_value, list):
 log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
 else:
 raise TypeError(f'{loss_name} is not a tensor or list of tensors')
 loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
 # If the loss_vars has different length, GPUs will wait infinitely
 if dist.is_available() and dist.is_initialized():
 log_var_length = torch.tensor(len(log_vars), device=loss.device)
 dist.all_reduce(log_var_length)
 message = (f'rank {dist.get_rank()}' +
 f' len(log_vars): {len(log_vars)}' + ' keys: ' +
 ','.join(log_vars.keys()))
 assert log_var_length == len(log_vars) * dist.get_world_size(), \
 'loss log variables are different across GPUs!\n' + message
 log_vars['loss'] = loss
 for loss_name, loss_value in log_vars.items():
 # reduce loss when distributed training
 if dist.is_available() and dist.is_initialized():
 loss_value = loss_value.data.clone()
 dist.all_reduce(loss_value.div_(dist.get_world_size()))
 log_vars[loss_name] = loss_value.item()
 return loss, log_vars

@DATASETS.register_module() class SmokeCropLoaderRadarFuse3(Dataset): CLASSES = ['car']

def __init__(self,
 pair_file,
 img_root,
 pkl_root,
 custom_mtx=None,
 nframe=5,
 img_rsz=[192, 224],
 skip=5,
 random_choose=True,
 random_erase=False,
 expand_ratio=2,
 ref_coord=False,
 crop_noise=True,
 center_base=False,
 auxy_root=None,
 drop_dr_ratio=0,
 pe_dim=0,
 bbox_num=1,
 feat_out=0,
 aux_coord=True,
 dist_range=None,
 main_type=['Car', 'Truck', 'Bus'],
 scale_range=[[-0.7, 0.7], [-0.4, 0.4], [0, 256], [0, 256]],
 filter_ignore=False,
 **kwards):
 if 'test_mode' in kwards:
 self.test_mode = kwards['test_mode']
 else:
 self.test_mode = False
 self.filter_ignore = filter_ignore
 self.dist_range = dist_range
 self.main_type = main_type
 self.aux_coord = aux_coord
 self.feat_out = feat_out
 self.drop_dr_ratio = drop_dr_ratio
 self.pkl_root = pkl_root
 self.bbox_num = bbox_num
 self.pe_dim = pe_dim
 self.ref_coord = ref_coord
 self.auxy_root = auxy_root
 self.img_root = img_root
 self.crop_noise = crop_noise
 self.skip = skip
 self.expand_ratio = expand_ratio
 self.random_choose = random_choose
 self.random_erase = random_erase
 self.nframe = nframe
 self.center_base = center_base
 self.img_rsz = img_rsz
 self.scale_range = scale_range
 self.custom_mtx = np.array(custom_mtx)
 self.load_infos(pair_file, pkl_root)
 self.transform = transforms.Compose([transforms.Resize((img_rsz[0], img_rsz[1])), transforms.ToTensor()])
 self.augment = AugmentImagePair([0.8, 1.2, 0.5, 2.0, 0.8, 1.2], balance_scale=True)
 self.flag = np.zeros(len(self.infos), dtype=np.uint8)
def load_bbox_infos(self, root):
 if root is not None:
 infos = []
 files = os.listdir(root)
 for file in files:
 img_path = os.path.join(root, file)
 items = file.split('[')[1].split(']')[0].split('-')
 try:
 bbox = [float(v) for v in items]
 infos.append({'img_path': img_path, 'bbox': bbox})
 except:
 continue
 self.bbox_infos = infos
 random.shuffle(self.bbox_infos)
 else:
 self.bbox_infos = None
def load_ignore(self, file):
 fp = open(file, 'r')
 lines = fp.readlines()
 fp.close()
 ignore = {}
 for l in lines:
 items = l.split(', ')
 batch = items[0]
 video = items[1]
 gid = items[2]
 key = '{}+{}+{}'.format(batch, video, gid)
 ignore[key] = 0
 return ignore
def load_infos(self, pair_file, pkl_root):
 if self.filter_ignore:
 self.ignore = self.load_ignore('/data1/mono_velocity-master/debug/err_infos/ignore')
 else:
 self.ignore = {}
 pair_file_list = pair_file if isinstance(pair_file, list) else [pair_file]
 max_gap = self.nframe * 100 + 1000
 if (self.test_mode == False) | self.feat_out | 1:
 self.infos = []
 if self.test_mode:
 self.data = {}
 for f in pair_file_list:
 try:
 data = mmcv.load(f)
 except:
 fp = open(f, 'rb')
 data = pickle.load(fp)
 fp.close()
 for batch in data:
 for video in data[batch]:
 if (video == None):
 continue
 for gid in data[batch][video]['data']:
 key = '{}+{}+{}'.format(batch, video, gid)
 if key in self.ignore:
 continue
 if ('type' in data[batch][video]) and (data[batch][video]['type'][gid] not in self.main_type):
 continue
 else:
 if self.test_mode == False:
 max_type = 6 if ('batch5' in batch) else type_cvtor[data[batch][video]['type'][gid]]
 else:
 max_type=0
 stamps = sorted([int(d) for d in list(data[batch][video]['data'][gid].keys())])
 if self.dist_range is not None:
 try:
 stamps = [d for d in stamps if (data[batch][video]['data'][gid][str(d)]['location'][2] > self.dist_range[0]) & (data[batch][video]['data'][gid][str(d)]['location'][2] < self.dist_range[1])]
 except:
 print(batch, video, gid, self.test_mode)
 continue
 if 0:
 for i in range(self.nframe - 1, len(stamps), self.skip):
 sweeps = [d for d in stamps if (stamps[i] - d < max_gap) & (stamps[i] - d >= 0)]
 if len(sweeps) < self.nframe:
 continue
 self.infos.append({'batch': batch,
 'video': video,
 'gid': gid,
 'type': max_type,
 'img_folder': data[batch][video]['img_folder'] if 'img_folder' in data[batch][video] else None,
 'sweeps': [str(s) for s in sweeps]})
 else:
 i = self.nframe - 1
 while i < len(stamps):
 sweeps = [d for d in stamps if (stamps[i] - d < max_gap) & (stamps[i] - d >= 0)]
 if len(sweeps) < self.nframe:
 i += self.skip
 continue
 self.infos.append({'batch': batch,
 'video': video,
 'gid': gid,
 'type': max_type,
 'img_folder': data[batch][video]['img_folder'] if 'img_folder' in data[batch][video] else None,
 'sweeps': [str(s) for s in sweeps]})
 if ('radar' in batch) & (False == self.test_mode):
 all_state = [data[batch][video]['data'][gid][str(s)]['state'] for s in stamps[i:i + self.skip]]
 else:
 all_state = None
 if (all_state != None) and (('拐点' in all_state) | ('减速' in all_state) | ('起步' in all_state) | ('缓行' in all_state) | ('加速' in all_state) | ('静止' in all_state)):
 i += 1
 elif 'batch5' in batch:
 i += 1
 else:
 i += self.skip
 if self.test_mode:
 self.data.update(data)
 else:
 # NOTE: 修改路径
 self.infos = mmcv.load("/data1/velocity_model/G3090/annotaions/merge_crop_info_valid_test-acc-20frame.pkl")
 self.data = {}
 for f in pair_file_list:
 try:
 data = mmcv.load(f)
 except:
 fp = open(f, 'rb')
 data = pickle.load(fp)
 fp.close()
 self.data.update(data)
 print('num samples:', len(self.infos))
 # bbox
 self.bbox_idx = 0
 self.load_bbox_infos(self.auxy_root)
 # calib data
 self.all_calib = None #mmcv.load('/dataset/zhangjingwei/label_pkl3/aeb_2M_1080/train_l3d_120d_2023_02/120_train__pkl_1117/calib.pkl')
 # pkl
 self.pkl_calib = mmcv.load(os.path.join(pkl_root, 'all_calib.pkl'))
def __len__(self):
 return len(self.infos)
def load_img_decry(self, image_path):
 img_decode = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), 1)
 if img_decode is None:
 if not hasattr(self, 'handle_dec'):
 kms_port = 443
 kms_ip = "encryptplatxn.hikvision.com"
 ca_path = "/data1/opensourcelib/G3090/deal_with_dataset/decrypt/misc/encryptplatxn.hikvision.com.pem"
 id_path = "/data1/opensourcelib/G3090/deal_with_dataset/decrypt/misc/id.txt"
 Dec_user_name = "zhengxiuzhi"
 Dec_user_password = "Hik12345"
 dec_flag = 0
 # 0.init enc lib
 # AuthEncInit()
 self.handle_dec = ctypes.c_void_p()
 ret = AuthEncryptCreateHandle(ctypes.pointer(self.handle_dec), kms_ip, kms_port, ca_path, Dec_user_name, Dec_user_password, dec_flag, id_path)
 fsize = os.path.getsize(image_path)
 pbuff = ctypes.create_string_buffer(fsize)
 dtat_len = ctypes.c_longlong(fsize) # 需要将开辟的结果缓存区长度输入接口作校验,如果长度不够会报错,成功则返回实际加解密后的数据长度给dst_len;
 ret = AuthDecryptFileToBuf(image_path, pbuff, ctypes.pointer(dtat_len), self.handle_dec)
 # AuthEncDeInit()
 img_arr = np.frombuffer(pbuff, np.uint8)
 img_decode = cv2.imdecode(img_arr, cv2.IMREAD_COLOR + cv2.IMREAD_IGNORE_ORIENTATION)
 if img_decode is None:
 img_decode = cv2.imread(image_path)
 if img_decode.shape[0] > 1080:
 img_decode = img_decode[100:1180, :]
 return img_decode
def load_img_decry2(self, image_path):
 img_decode = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), 1)
 if img_decode is None:
 if not hasattr(self, 'handle_dec2'):
 kms_port = 443
 kms_ip = "encryptplatxn.hikvision.com"
 # NOTE: 替换为本地解密文件路径
 ca_path = "/data1/velocity_model/G3090/mmdetection3d/mmdet3d/datasets/decrypt/misc/encryptplatxn.hikvision.com.pem"
 id_path = "/data1/velocity_model/G3090/mmdetection3d/mmdet3d/datasets/decrypt/misc/id.txt"
 Dec_user_name = "wuziyang"
 Dec_user_password = "Wzy123456"
 dec_flag = 0
 # 0.init enc lib
 # AuthEncInit()
 self.handle_dec2 = ctypes.c_void_p()
 ret = AuthEncryptCreateHandle(ctypes.pointer(self.handle_dec2), kms_ip, kms_port, ca_path, Dec_user_name, Dec_user_password, dec_flag, id_path)
 fsize = os.path.getsize(image_path)
 pbuff = ctypes.create_string_buffer(fsize)
 dtat_len = ctypes.c_longlong(fsize) # 需要将开辟的结果缓存区长度输入接口作校验,如果长度不够会报错,成功则返回实际加解密后的数据长度给dst_len;
 ret = AuthDecryptFileToBuf(image_path, pbuff, ctypes.pointer(dtat_len), self.handle_dec2)
 # AuthEncDeInit()
 img_arr = np.frombuffer(pbuff, np.uint8)
 img_decode = cv2.imdecode(img_arr, cv2.IMREAD_COLOR + cv2.IMREAD_IGNORE_ORIENTATION)
 if img_decode is None:
 img_decode = cv2.imread(image_path)
 if img_decode.shape[0] > 1080:
 img_decode = img_decode[100:1180, :]
 return img_decode
def distort_point(self, uv, mtx, distort_K):
 x0 = (uv[..., 0] - mtx[0, 2]) / mtx[0, 0]
 y0 = (uv[..., 1] - mtx[1, 2]) / mtx[1, 1]
 r2 = x0 * x0 + y0 * y0
 r4 = r2 * r2
 r6 = r4 * r2
 k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 = distort_K[0], distort_K[1], distort_K[2], distort_K[3], distort_K[4], distort_K[5], distort_K[6], distort_K[7]
 x = (x0 * (1 + k_1 * r2 + k_2 * r4 + k_3 * r6) / (1 + k_4 * r2 + k_5 * r4 + k_6 * r6) + 2 * p_1 * x0 * y0 + p_2 * (r2 + 2 * x0 * x0))
 y = (y0 * (1 + k_1 * r2 + k_2 * r4 + k_3 * r6) / (1 + k_4 * r2 + k_5 * r4 + k_6 * r6) + 2 * p_2 * x0 * y0 + p_1 * (r2 + 2 * y0 * y0))
 u = x * mtx[0, 0] + mtx[0, 2]
 v = y * mtx[1, 1] + mtx[1, 2]
 return np.stack((u, v), axis=-1)
def load_image(self, calib, info, bbox, stamp):
 try:
 mtx = calib['P2'][:, :3]
 distort = calib['distCoeffs'][:8]
 # load image
 img_path = '{}/{}.jpg'.format(info['img_folder'], stamp)
 image = self.load_img_decry(img_path)
 img_w = image.shape[1]
 img_h = image.shape[0]
 source = image.reshape(-1, 3)
 # resize
 w = bbox[2] - bbox[0]
 h = bbox[3] - bbox[1]
 scale =self.img_rsz[0] / h
 dw = int(scale * w)
 dh = int(scale * h)
 dst = np.zeros((self.img_rsz[0], self.img_rsz[1], 3), dtype=np.uint8).reshape(-1, 3)
 if self.center_base:
 u = np.arange(self.img_rsz[1])
 v = np.arange(self.img_rsz[0])
 uu, vv = np.meshgrid(u, v)
 suu = (uu + 0.5 - 0.5 * self.img_rsz[1]) / scale + 0.5 * (bbox[0] + bbox[2])
 svv = (vv + 0.5 - 0.5 * self.img_rsz[0]) / scale + 0.5 * (bbox[1] + bbox[3])
 else:
 u = np.arange(dw)
 v = np.arange(dh)
 uu, vv = np.meshgrid(u, v)
 suu = (uu + 0.5) / scale + bbox[0]
 svv = (vv + 0.5) / scale + bbox[1]
 spp = np.ones_like(suu)
 # virture camera to real camera
 uvz = np.matmul(
 np.expand_dims(np.expand_dims(np.matmul(mtx, np.linalg.inv(self.custom_mtx)), axis=0), axis=0),
 np.expand_dims(np.stack((suu, svv, spp), axis=-1), axis=-1)).squeeze(axis=-1)
 # distort
 dist_uv = self.distort_point(uvz, mtx, distort)
 # interpolate
 x1y1 = dist_uv.astype(np.int64).reshape(-1, 2)
 mask = (x1y1[..., 0] >= 0) & (x1y1[..., 0] < img_w) & (x1y1[..., 1] >= 0) & (x1y1[..., 1] < img_h)
 src_ind = x1y1[:, 0] + x1y1[:, 1] * img_w
 dst_ind = (uu + vv * self.img_rsz[1]).reshape(-1)
 dst[dst_ind[mask]] = source[src_ind[mask]]
 dst = dst.reshape(self.img_rsz[0], self.img_rsz[1], 3)
 img = torch.from_numpy(cv2.cvtColor(dst, cv2.COLOR_BGR2RGB).astype(np.float32).transpose(2, 0, 1) / 255)
 except:
 print('load {}/{}.jpg failed'.format(info['img_folder'], stamp))
 img = None
 scale = 1
 return img, scale
def proj_bbox(self, calib, anno):
 # 3D框
 cube = anno['position'] + anno['dimension'] + [anno['theta']]
 cube = np.expand_dims(np.array([float(d) for d in cube]), axis=0)
 if self.crop_noise:
 var = np.array([0.05, 0.05, 0.1, 0.05, 0.05, 0.05])
 prob = 2 * (np.random.rand(6) - 0.5)
 cube[0, :6] *= (1 + prob * var)
 cube[0, 6] += 2 * (random.random() - 0.5) * math.pi * 10 / 180
 rot = cube[..., 6]
 R = np.expand_dims(np.eye(3), axis=0)
 R[:, 0, 0] = math.cos(rot)
 R[:, 0, 2] = math.sin(rot)
 R[:, 2, 0] = -math.sin(rot)
 R[:, 2, 2] = math.cos(rot)
 R = np.expand_dims(R, axis=1)
 corners = np.array([[0.5, -1, 0.5],
 [0.5, -1, -0.5],
 [-0.5, -1, -0.5],
 [-0.5, -1, 0.5],
 [0.5, 0, 0.5],
 [0.5, 0, -0.5],
 [-0.5, 0, -0.5],
 [-0.5, 0, 0.5]]).reshape(1, 8, 3)
 corners = (corners * np.expand_dims(cube[:, 3:6], axis=1)).reshape(1, 8, 3, 1)
 rot_corners = np.matmul(R, corners)
 rot_corners += cube[:, :3].reshape(-1, 1, 3, 1)
 rot_corners = np.concatenate((rot_corners, np.ones((1, 8, 1, 1))), axis=2)
 # 转相机
 velo2cam = np.eye(4)
 velo2cam[:3] = calib['Tr_velo_to_cam']
 velo2cam = velo2cam.reshape(1, 1, 4, 4)
 xyzc = np.matmul(velo2cam, rot_corners)[:, :, :3, :]
 # 转图像
 if self.custom_mtx is not None:
 mtx = self.custom_mtx
 else:
 mtx = calib['P2']
 cam2img = mtx[:, :3].reshape(1, 1, 3, 3)
 uvs = np.matmul(cam2img, xyzc).squeeze(axis=-1)
 uvs[..., 0] /= uvs[..., 2]
 uvs[..., 1] /= uvs[..., 2]
 xmin = max(0, min(uvs[..., 0].min(axis=1)[0], 1920))
 xmax = max(0, min(uvs[..., 0].max(axis=1)[0], 1920))
 ymin = max(0, min(uvs[..., 1].min(axis=1)[0], 1080))
 ymax = max(0, min(uvs[..., 1].max(axis=1)[0], 1080))
 bbox = np.array([xmin, ymin, xmax, ymax])
 return bbox
def decode_annos(self, calib, anno):
 # 中心点
 xyz = np.array([float(d) for d in anno['position']] + [1])
 xyz[1] -= 0.5 * float(anno['dimension'][1])
 xyz = xyz.reshape(4, 1)
 # 转相机
 velo2cam = np.eye(4)
 velo2cam[:3] = calib['Tr_velo_to_cam']
 velo2cam = velo2cam.reshape(4, 4)
 xyzc = np.matmul(velo2cam, xyz).squeeze(axis=-1)[:3]
 # 速度旋转
 velo2cam[:3, 3] *= 0
 vel = np.array([float(d) for d in anno['velocity']] + [1])
 vel = vel.reshape(4, 1)
 velc = (np.matmul(velo2cam, vel).squeeze(axis=-1))[:3]
 return xyzc, velc
def get_cam_inner(self, calib):
 if self.custom_mtx is None:
 cam_fx = calib['P2'][0, 0]
 cam_fy = calib['P2'][1, 1]
 cam_cx = calib['P2'][0, 2]
 cam_cy = calib['P2'][1, 2]
 else:
 cam_fx = self.custom_mtx[0, 0]
 cam_fy = self.custom_mtx[1, 1]
 cam_cx = self.custom_mtx[0, 2]
 cam_cy = self.custom_mtx[1, 2]
 return cam_fx, cam_fy, cam_cx, cam_cy
def get_stamps(self, info):
 sweeps = info['sweeps']
 if self.random_choose & (False == self.test_mode):
 random.shuffle(sweeps)
 stamps = list(sorted(sweeps[:self.nframe]))
 else:
 stamps = sweeps[-self.nframe:]
 return stamps
def encode_coord(self, calib, scale, flow_bbox):
 if flow_bbox is not None:
 cam_fx, cam_fy, cam_cx, cam_cy = self.get_cam_inner(calib)
 if self.pe_dim == 0:
 u = np.arange(self.img_rsz[1])
 v = np.arange(self.img_rsz[0])
 if self.center_base:
 x = ((u + 0.5 - 0.5 * self.img_rsz[1]) / scale + 0.5 * (flow_bbox[0, 0] + flow_bbox[0, 2]) - cam_cx) / cam_fx
 if self.ref_coord:
 y = (v + 0.5 - 0.5 * self.img_rsz[0]) / scale
 else:
 y = ((v + 0.5 - 0.5 * self.img_rsz[0]) / scale + 0.5 * (flow_bbox[0, 1] + flow_bbox[0, 3]) - cam_cy) / cam_fy
 else:
 x = (u / scale + flow_bbox[0, 0] - cam_cx) / cam_fx
 if self.ref_coord:
 y = v / scale
 else:
 y = (v / scale + flow_bbox[0, 1] - cam_cy) / cam_fy
 x = np.expand_dims(x, axis=0)
 y = np.expand_dims(y, axis=0)
 x = np.repeat(np.expand_dims(x, 1), self.img_rsz[0], axis=1)
 y = np.repeat(np.expand_dims(y, 2), self.img_rsz[1], axis=2)
 # NOTE: 补充两个通道,与之前保持一致@wuziyang
 if self.aux_coord:
 uu, vv = np.meshgrid(u, v)
 x_prime = uu * cam_fx / (flow_bbox[0, 2] - flow_bbox[0, 0] + 1e-8) / self.img_rsz[1]
 y_prime = vv * cam_fy / (flow_bbox[0, 3] - flow_bbox[0, 1] + 1e-8) / self.img_rsz[0]
 coord = torch.from_numpy(np.concatenate([x, y, np.expand_dims(x_prime, axis=0), np.expand_dims(y_prime, axis=0)])).float()
 else:
 coord = torch.from_numpy(np.concatenate((x, y), axis=0)).float()
 else:
 temperature = 10000
 dim_t = torch.arange(self.pe_dim, dtype=torch.float32)
 dim_t = temperature ** (2 * (dim_t // 2) / self.pe_dim)
 w = self.img_rsz[1] // 32
 h = self.img_rsz[0] // 32
 u = torch.arange(w)
 v = torch.arange(h)
 # uu = (flow_bbox[0, 2] - flow_bbox[0, 0]) * (u + 0.5) / w + flow_bbox[0, 0]
 # vv = (flow_bbox[0, 3] - flow_bbox[0, 1]) * (v + 0.5) / h + flow_bbox[0, 1]
 map_scale = scale / 32
 if self.center_base:
 uu = ((u + 0.5 - 0.5 * w) / map_scale + 0.5 * (flow_bbox[0, 0] + flow_bbox[0, 2]) - cam_cx) / cam_fx
 if self.ref_coord:
 vv = (v + 0.5 - 0.5 * h) / map_scale
 else:
 vv = ((v + 0.5 - 0.5 * h) / map_scale + 0.5 * (flow_bbox[0, 1] + flow_bbox[0, 3]) - cam_cy) / cam_fy
 else:
 uu = (u / map_scale + flow_bbox[0, 0] - cam_cx) / cam_fx
 if self.ref_coord:
 vv = v / map_scale
 else:
 vv = (v / map_scale + flow_bbox[0, 1] - cam_cy) / cam_fy
 pos_y, pos_x = torch.meshgrid(vv, uu)
 pos_x = pos_x[:, :, None] / dim_t
 pos_y = pos_y[:, :, None] / dim_t
 pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=-1).flatten(-2)
 pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=-1).flatten(-2)
 coord = torch.cat((pos_y, pos_x), dim=-1).permute(2, 0, 1)
 else:
 if self.pe_dim > 0:
 coord = torch.rand(self.pe_dim * 2, self.img_rsz[0] // 32, self.img_rsz[1] // 32).float()
 else:
 # NOTE: 补充两个通道,与之前保持一致@wuziyang
 if self.aux_coord:
 coord = torch.rand(4, self.img_rsz[0], self.img_rsz[1]).float()
 else:
 coord = torch.rand(2, self.img_rsz[0], self.img_rsz[1]).float()
 return coord
def get_data_info(self, idx):
 info = copy.deepcopy(self.infos[idx])
 calib = self.all_calib[self.pkl_calib[info['batch']][info['video']]['calib']]['2M']
 stamps = self.get_stamps(info)
 all_location = np.zeros((self.nframe, 3), dtype=np.float32)
 all_velocity = np.zeros((self.nframe, 3), dtype=np.float32)
 all_img = np.zeros((self.nframe, 3, self.img_rsz[0], self.img_rsz[1]), dtype=np.float32)
 all_weight = np.ones((self.nframe), dtype=np.float32)
 all_stamps = np.zeros((self.nframe), dtype=np.float32)
 all_DR = np.zeros((self.nframe, 3), dtype=np.float32)
 all_DR_vel = np.zeros((self.nframe, 3), dtype=np.float32)
 all_r_vel = np.zeros((self.nframe, 3), dtype=np.float32)
 all_DR_pos = np.zeros((self.nframe, 3), dtype=np.float32)
 all_2D_bbox = np.zeros((self.nframe, 4), dtype=np.float32)
 all_coord = []
 for i, stamp in enumerate(stamps):
 anno = mmcv.load('{}/{}/{}/{}/{}.pkl'.format(self.pkl_root, info['batch'], info['video'], info['gid'], stamp))
 bbox = self.proj_bbox(calib, anno)
 location, velocity = self.decode_annos(calib, anno)
 flow_bbox, n_bbox = generate_flow_rois(np.expand_dims(bbox, axis=0), self.expand_ratio, 1920, 1080, pad=5)
 crop, scale = self.load_image(calib, info, flow_bbox[0], stamp)
 coord = self.encode_coord(calib, scale, flow_bbox)
 all_location[i] = location
 all_velocity[i] = velocity
 all_img[i] = crop
 all_stamps[i] = 0.001 * float(stamp)
 all_DR_vel[i] = velocity
 all_r_vel[i] = velocity
 all_DR_pos[i] = location
 all_coord.append(coord)
 all_coord = torch.stack(all_coord, dim=0)
 all_2D_bbox = torch.from_numpy(all_2D_bbox)
 all_location = torch.from_numpy(all_location)
 all_velocity = torch.from_numpy(all_velocity)
 all_img = torch.from_numpy(all_img)
 all_stamps = torch.from_numpy(all_stamps)
 all_weight = torch.from_numpy(all_weight)
 all_DR = torch.from_numpy(all_DR)
 all_DR_vel = torch.from_numpy(all_DR_vel)
 all_r_vel = torch.from_numpy(all_r_vel)
 all_DR_pos = torch.from_numpy(all_DR_pos)
 all_dr_mat = torch.tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]]).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
 t = all_stamps - all_stamps.mean()
 if self.random_erase > 0:
 a = torch.rand(self.nframe)
 v = torch.sort(a)[0][int(self.random_erase * self.nframe)] * random.random()
 all_weight = ~(a < v)
 sample = {'img': all_img,
 'rois': all_2D_bbox,
 'location': all_location,
 'velocity': all_velocity,
 'coord': all_coord,
 't': t,
 'all_weight': all_weight,
 'all_DR': all_DR,
 'all_DR_vel': all_DR_vel,
 'all_r_vel': all_r_vel,
 'all_r_pos': all_location[..., [0, 2]],
 'all_DR_pos': all_DR_pos,
 'all_dr_mat': all_dr_mat,
 'cam2bev': torch.eye(3).float()
 }
 if self.test_mode == False:
 sample = self.augment(sample)
 return sample
def crop_image(self, info, anno, stamp):
 if (self.img_rsz[0] == 192) & (self.img_rsz[1] == 224):
 img_path = '{}/{}/{}/{}.bmp'.format(self.img_root, info['video'], info['gid'], stamp)
 crop = self.load_img_decry2(img_path) 
 scale = anno['scale']
 bbox = np.array(anno['roi'])
 # 1-2批数据是左上角抠图
 if self.center_base and (info['batch'] == 'radar_crop_info_batch12'):
 flow = anno['flow']
 scale = anno['scale']
 cx = 0.5 * (flow[2] - flow[0]) * scale
 cy = 0.5 * (flow[3] - flow[1]) * scale
 dx = int(crop.shape[1] / 2 - cx)
 dy = int(crop.shape[0] / 2 - cy)
 tsf = np.zeros_like(crop)
 tsf[dy:crop.shape[0] - dy, dx:crop.shape[1] - dx] = crop[:crop.shape[0] - 2 * dy, :crop.shape[1] - 2 * dx]
 crop = tsf
 elif (self.img_rsz[0] == 128) & (self.img_rsz[1] == 128):
 try:
 img_path = '{}/{}/{}/{}-128x128.bmp'.format(self.img_root, info['video'], info['gid'], stamp)
 crop = self.load_img_decry2(img_path)
 except:
 img_path = '{}/{}/{}/{}-128x128.bmp'.format('/data1/DataSets/cropsets/imgs', info['video'], info['gid'], stamp)
 crop = self.load_img_decry2(img_path)
 try:
 scale = anno['scale_128']
 if 'roi_128' in anno:
 bbox = np.array(anno['roi_128'])
 else:
 bbox = None
 except:
 if self.test_mode:
 scale = anno['scale']
 bbox = None
 else:
 raise "no scale info"
 else:
 raise "not inplement img size"
 return crop, bbox, scale
def get_data_info_radar(self, idx):
 info = copy.deepcopy(self.infos[idx])
 stamps = self.get_stamps(info)
 if self.test_mode:
 avm2car = self.data[info['batch']][info['video']]['avm2car']
 cam2car = self.data[info['batch']][info['video']]['came2car']
 bev2cam = self.data[info['batch']][info['video']]['bev2cam']
 else:
 avm2car = self.pkl_calib[info['batch']][info['video']]['avm2car']
 cam2car = self.pkl_calib[info['batch']][info['video']]['came2car']
 bev2cam = self.pkl_calib[info['batch']][info['video']]['bev2cam']
 all_type = np.ones((self.nframe), dtype=np.float32)
 all_location = np.zeros((self.nframe, 3), dtype=np.float32)
 all_velocity = np.zeros((self.nframe, 3), dtype=np.float32)
 all_img = np.zeros((self.nframe, 3, self.img_rsz[0], self.img_rsz[1]), dtype=np.float32)
 all_weight = np.ones((self.nframe), dtype=np.float32)
 all_stamps = np.zeros((self.nframe), dtype=np.float32)
 all_DR = np.zeros((self.nframe, 3), dtype=np.float32)
 all_DR_vel = np.zeros((self.nframe, 3), dtype=np.float32)
 all_r_vel = np.zeros((self.nframe, 3), dtype=np.float32)
 all_DR_pos = np.zeros((self.nframe, 3), dtype=np.float32)
 all_2D_bbox = np.zeros((self.nframe, 4), dtype=np.float32)
 all_sp_vel = np.zeros((self.nframe), dtype=np.float32)
 all_sp_acc = np.zeros((self.nframe), dtype=np.float32)
 all_coord = []
 for i, stamp in enumerate(stamps):
 if self.test_mode:
 anno = copy.deepcopy(self.data[info['batch']][info['video']]['data'][info['gid']][stamp])
 else:
 anno = mmcv.load('{}/{}/{}/{}/{}.pkl'.format(self.pkl_root, info['batch'], info['video'], info['gid'], stamp))
 location = anno['location']
 velocity = anno['velocity']
 flow_bbox = np.array([anno['flow']])
 dr = np.array([anno['dr']])
 dr_vel = np.array([anno['dr_vel']])
 r_vel = np.array([anno['r_vel']])
 dr_pos = np.array([anno['dr_pos']])
 # load image
 crop, rect_2d, scale = self.crop_image(info, anno, stamp)
 crop = torch.from_numpy(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB).astype(np.float32).transpose(2, 0, 1) / 255)
 # coord
 coord = self.encode_coord(None, scale, flow_bbox)
 all_coord.append(coord)
 all_location[i] = location
 all_velocity[i] = velocity
 all_img[i] = crop
 all_stamps[i] = 0.001 * float(stamp)
 all_DR[i] = dr
 all_DR_vel[i] = dr_vel
 all_r_vel[i] = r_vel
 all_DR_pos[i] = dr_pos
 all_2D_bbox[i] = rect_2d
 if self.test_mode == False:
 all_sp_vel[i] = anno['sp_vel']
 all_sp_acc[i] = anno['sp_acc']
 all_location = torch.from_numpy(all_location)
 all_velocity = torch.from_numpy(all_velocity)
 all_img = torch.from_numpy(all_img)
 all_coord = torch.from_numpy(np.stack(all_coord, axis=0))
 all_stamps = torch.from_numpy(all_stamps)
 all_weight = torch.from_numpy(all_weight)
 all_DR = torch.from_numpy(all_DR)
 all_DR_vel = torch.from_numpy(all_DR_vel)
 all_r_vel = torch.from_numpy(all_r_vel)
 all_DR_pos = torch.from_numpy(all_DR_pos)
 all_2D_bbox = torch.from_numpy(all_2D_bbox)
 all_sp_vel = torch.from_numpy(all_sp_vel)
 all_sp_acc = torch.from_numpy(all_sp_acc)
 all_type = torch.from_numpy(all_type) * info['type']
 
 if self.feat_out:
 mean_dr_x = 0
 mean_dr_y = 0
 else:
 mean_dr_x = all_DR[..., 0].mean()
 mean_dr_y = all_DR[..., 1].mean()
 rot_mat = torch.eye(3).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
 tns_mat = torch.eye(3).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
 chg_mat = torch.tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]]).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
 rot_mat[:, 0, 0] = torch.cos(all_DR[..., -1])
 rot_mat[:, 0, 1] = -torch.sin(all_DR[..., -1])
 rot_mat[:, 0, 2] = all_DR[..., 0] - mean_dr_x
 rot_mat[:, 1, 0] = torch.sin(all_DR[..., -1])
 rot_mat[:, 1, 1] = torch.cos(all_DR[..., -1])
 rot_mat[:, 1, 2] = all_DR[..., 1] - mean_dr_y
 tns_mat[:, 0, 2] = -cam2car[0] + avm2car[0]
 tns_mat[:, 1, 2] = -cam2car[1] + avm2car[1]
 all_dr_mat = torch.matmul(torch.matmul(rot_mat, tns_mat), chg_mat)
 all_DR_pos[..., 0] = all_DR_pos[..., 0] - mean_dr_x
 all_DR_pos[..., 2] = all_DR_pos[..., 2] - mean_dr_y
 t = all_stamps - all_stamps.mean()
 theta = all_DR[..., -1]
 r_mat = torch.stack((torch.cos(theta), torch.sin(theta), -torch.sin(theta), torch.cos(theta)), dim=-1).reshape(len(theta), 2, 2)
 r_pos = torch.matmul(r_mat, all_DR_pos[..., [0, 2]].unsqueeze(dim=-1)).squeeze(dim=-1)
 if self.random_erase > 0:
 a = torch.rand(self.nframe)
 v = torch.sort(a)[0][int(self.random_erase * self.nframe)] * random.random()
 all_weight = ~(a < v)
 sample = {'img': all_img,
 'rois': all_2D_bbox,
 'location': all_location,
 'velocity': all_velocity,
 'coord': all_coord,
 't': t,
 'all_weight': all_weight,
 'all_DR': all_DR,
 'all_DR_vel': all_DR_vel,
 'all_r_vel': all_r_vel,
 'all_r_pos': r_pos,
 'all_DR_pos': all_DR_pos,
 'all_dr_mat': all_dr_mat,
 'cam2bev': torch.inverse(torch.tensor(bev2cam).float().reshape(3, 3)),
 'all_sp_vel': all_sp_vel,
 'all_sp_acc': all_sp_acc,
 'all_type': all_type.long(),
 }
 if (self.test_mode == False) | self.feat_out:
 sample = self.augment(sample)
 if random.random() < self.drop_dr_ratio:
 sample['all_DR'] *= 0
 sample['all_DR_vel'] = copy.deepcopy(sample['velocity'])
 sample['all_r_vel'] = copy.deepcopy(sample['velocity'])
 sample['all_DR_pos'] = copy.deepcopy(sample['location'])
 sample['all_dr_mat'] = torch.eye(3).unsqueeze(dim=0).repeat(self.nframe, 1, 1).float()
 return sample
def ScaleOffsetFlip(self, image, bbox):
 # 图像变换
 height, width = image.shape[:2]
 tx, ty = width * random.uniform(-0.2, 0.2), height * random.uniform(-0.2, 0.2)
 s = random.uniform(0.8, 1.2)
 M_combined = np.float32([[s, 0, tx], [0, s, ty]])
 aux_img = cv2.warpAffine(image, M_combined, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)
 # 坐标变换
 points = np.float32([[bbox[0], bbox[1], 1], [bbox[2], bbox[3], 1]])
 points = np.matmul(np.expand_dims(M_combined, axis=0), points.reshape(2, 3, 1)).reshape(-1).tolist()
 points[0] = min(max(points[0], 0), width - 1)
 points[2] = min(max(points[2], 0), width)
 points[1] = min(max(points[1], 0), height - 1)
 points[3] = min(max(points[3], 0), height)
 img = torch.from_numpy(cv2.cvtColor(aux_img, cv2.COLOR_BGR2RGB).astype(np.float32).transpose(2, 0, 1) / 255)
 roi = torch.tensor(points).float()
 return img, roi
def crop_image_bbox(self, img, bbox, scale):
 if (self.img_rsz[0] == 192) & (self.img_rsz[1] == 224):
 pass
 elif (self.img_rsz[0] == 128) & (self.img_rsz[1] == 128):
 crop_192x192 = img[:, 16:208, :]
 img = cv2.resize(crop_192x192, (128, 128), interpolation=cv2.INTER_LINEAR)
 bbox[0::2] -= 16
 bbox *= 128 / 192
 scale *= 128 / 192
 elif (self.img_rsz[0] == 160) & (self.img_rsz[1] == 160):
 crop_192x192 = img[:, 16:208, :]
 img = cv2.resize(crop_192x192, (160, 160), interpolation=cv2.INTER_LINEAR)
 bbox[0::2] -= 16
 bbox *= 160 / 192
 scale *= 160 / 192
 elif (self.img_rsz[0] == 192) & (self.img_rsz[1] == 192):
 crop_192x192 = img[:, 16:208, :]
 img = crop_192x192
 bbox[0::2] -= 16
 else:
 raise "not inplement img size"
 return img, bbox, scale
def get_data_info_bbox(self, ):
 all_img = torch.zeros(self.bbox_num, 3, self.img_rsz[0], self.img_rsz[1]).float()
 all_2D_bbox = torch.zeros(self.bbox_num, 4).float()
 all_coord = []
 for i in range(self.bbox_num):
 idx = self.bbox_idx % len(self.bbox_infos)
 self.bbox_idx += 1
 info = copy.deepcopy(self.bbox_infos[idx])
 img = cv2.imdecode(np.fromfile(info['img_path'], dtype=np.uint8), 1)
 bbox = np.array(copy.deepcopy(info['bbox']))
 img, bbox, _ = self.crop_image_bbox(img, bbox, 1)
 img, roi = self.ScaleOffsetFlip(img, bbox)
 all_img[i] = img
 all_2D_bbox[i] = roi
 coord = self.encode_coord(None, 0, None)
 all_coord.append(coord)
 all_coord = torch.stack(all_coord, dim=0)
 sample = {'img': all_img,
 'rois': all_2D_bbox,
 }
 if self.pe_dim == 0:
 sample['coord'] = all_coord
 if self.test_mode == False:
 sample = self.augment(sample)
 return sample
def __getitem__(self, idx):
 while 1:
 try:
 if 'radar' in self.infos[idx]['batch']:
 sample = self.get_data_info_radar(idx)
 else:
 sample = self.get_data_info(idx)
 if self.test_mode == False:
 aux_sample = self.get_data_info_bbox()
 for key in aux_sample:
 sample[key] = torch.cat((sample[key], aux_sample[key]), dim=0)
 break
 except:
 print('>>>error occured:', idx)
 idx = random.randint(0, len(self.infos) - 1)
 continue
 return sample
def format_results(self, outputs, **kwargs):
 all_results = {}
 for info, res in zip(self.infos, outputs):
 res = {key: res[key].cpu().detach().numpy() if isinstance(res[key], torch.Tensor) else res[key] for key in res}
 res['img_folder'] = info['img_folder']
 try:
 stamp = info['stamps'][-1]
 except:
 stamp = info['sweeps'][-1]
 key = '{}+{}+{}'.format(info['batch'], info['video'], info['gid'])
 if key not in all_results:
 all_results[key] = {}
 all_results[key][stamp] = res
 mmcv.dump(all_results, kwargs['pklfile_prefix'])

custom_imports = dict(

imports=['projects.mmdet3d_plugin'], allow_failed_imports=False)

find_unused_parameters = True file_client_args = dict(backend='disk') camera_inner = [[1489.8095, 0.0, 961.03457], [0.0, 1489.8272, 532.078111], [0.0, 0.0, 1.0]] data = dict( samples_per_gpu=16, workers_per_gpu=4, train=dict( type='SmokeCropLoaderRadarFuse3', pair_file=[ '/share/wuziyang/DataSets/annos_info/all_sp_data_type.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch6-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch7-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch8-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch9-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch10-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch11-acc-sp-type-bbox.pkl', '/share/wuziyang/DataSets/annos_info/crop_info_batch0613-acc-sp-type-bbox.pkl' ], seq_root='/share/wuziyang/DataSets/annos_info/seq_files', img_root='/dataset/zhangjingwei/VelocityCropSet', pkl_root='/share/wuziyang/DataSets/annos_info/pkl_files', auxy_root='/share/wuziyang/DataSets/cropsets/bbox', custom_mtx=[[1489.8095, 0.0, 961.03457], [0.0, 1489.8272, 532.078111], [0.0, 0.0, 1.0]], img_rsz=[128, 128], nframe=30, pe_dim=0, skip=5, ref_coord=False, center_base=True, random_erase=0.0, crop_noise=True, drop_dr_ratio=0, random_choose=True, filter_ignore=False, main_type=['Truck', 'Bus', 'Car', '2wheel', '3wheel', 'Human', 'Fake'], test_mode=False), val=dict( type='SmokeCropLoaderRadarFuse3', pair_file= # '/share/wuziyang/mono_velocity-master/data/cipv_train_120_309_BY_OS_WP_XY_4_filter.pkl', ['/share/wuziyang/DataSets/annos_info/crop_info_batch_test-acc.pkl'], img_root='/dataset/zhangjingwei/VelocityCropSet', pkl_root='/share/wuziyang/DataSets/annos_info/pkl_files', custom_mtx=[[1489.8095, 0.0, 961.03457], [0.0, 1489.8272, 532.078111], [0.0, 0.0, 1.0]], pe_dim=0, skip=1, ref_coord=False, img_rsz=[128, 128], nframe=30, center_base=True, crop_noise=False, test_mode=True), test=dict( type='SmokeCropLoaderRadarFuse3', pair_file=['/share/wuziyang/DataSets/annos_info/crop_info_batch_test-acc.pkl'], img_root='/dataset/zhangjingwei/VelocityCropSet', pkl_root='/share/wuziyang/DataSets/annos_info/pkl_files', custom_mtx=[[1489.8095, 0.0, 961.03457], [0.0, 1489.8272, 532.078111], [0.0, 0.0, 1.0]], pe_dim=0, skip=1, ref_coord=False, img_rsz=[128, 128], nframe=30, center_base=True, crop_noise=False, main_type=['Truck', 'Bus', 'Car'], test_mode=True)) model = dict( type='RadarVelocityBboxOnnxFuse3', backbone=None, neck=None, head=None, bbox_type='x1y1x2y2', roi_scale=True, use_scale=False, train_cfg=None, test_cfg=None, img_sz=[128, 128], feat_dim=256, use_bn=False, use_weight=True, alpha_acc=10, classify=False, poly=3) total_epochs = 6 runner = dict(type='EpochBasedRunner', max_epochs=6) optimizer = dict(type='Adam', lr=0.0001) optimizer_config = dict(grad_clip=None) lr_config = dict(policy='step', warmup=None, step=[3, 4, 5]) evaluation = dict(interval=360, pipeline=None) checkpoint_config = dict(interval=1) log_config = dict( interval=50, hooks=[dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook')]) custom_hooks = [dict(type='Custom3DHook', save_iter=-1, save_path='')] dist_params = dict(backend='nccl') log_level = 'INFO' workflow = [('train', 1)] opencv_num_threads = 0 mp_start_method = 'fork'

resume_from = '/share/wuziyang/mono_velocity-master/results/9fram-bbox-onnx-fuse-weight-sp-3/latest.pth'

resume_from = None

load_from = '/share/wuziyang/result_from_wuziyang/9fram-bbox-onnx-fuse-weight-sp-3/epoch_5.pth'

load_from = '/share/wuziyang/velocity_model/G3090/work_dir/from_wuziyang/v0509/epoch_6.pth' work_dir = '/share/wuziyang/velocity_model/G3090/work_dir/from_wuziyang/v0524+truck+tunnel'

config = '/share/wuziyang/opensourcelib/G3090/projects/configs/velocity/radar_velocity_onnx_fuse_sp.py'

gpu_ids = range(0, 1)

About

No description, website, or topics provided.

Resources

License

Contributing

Stars

Watchers

Forks

Releases

No releases published

Packages

Contributors

Languages

AltStyle によって変換されたページ (->オリジナル) /