-
Notifications
You must be signed in to change notification settings - Fork 69
Open
@laobadao
Description
weight: bit: 8 symmetric: True granularity: per_channel group_size: -1 calib_algo: mse act: bit: 8 symmetric: True granularity: per_token calib_algo: minmax special: actorder: True static_groups: False percdamp: 0.01 blocksize: 128 chunk_num: 4 true_sequential: True online_rotate: False fp32_had: True
GPTQ, yaml 这么配置时, 虽然 granularity: per_channel group_size: -1 但是, blocksize: 128 ,请问下,那实际是按 blocksize: 128 优化的嘛? 还是说把 block_size 也改成 -1 , 相关代码对应修改下 更合理呢?
def weight_transform(self, W, Hinv, Losses, tmp): # NOTE: curr_blocksize = self.blocksize if curr_blocksize <= 0: logger.info('Blocksize < 0 , using perchannel GPTQ') curr_blocksize = W.shape[1] print(f'=== W.shape:{W.shape}') for i1 in range(0, self.n_nonout, curr_blocksize): # for i1 in range(0, self.n_nonout, self.blocksize): i2 = min(i1 + curr_blocksize, self.n_nonout) count = i2 - i1 W1, Hinv1 = W[:, i1:i2].clone(), Hinv[i1:i2, i1:i2] tmp1, Err1, Losses1 = ( torch.zeros_like(W1), torch.zeros_like(W1), torch.zeros_like(W1), ) for i in range(count): w, d = W1[:, i], Hinv1[i, i] if self.wquantizer.granularity == 'per_group': idx = i1 + i if not self.static_groups: if (i1 + i) % self.wquantizer.group_size == 0: column_tensors = W[ :, (i1 + i): min( (i1 + i + self.wquantizer.group_size), (self.columns - self.n_out), ), ] self.search_column_qparams(column_tensors, idx) else: if self.actorder: idx = self.perm[idx] self.qparams = self.groups[idx // self.wquantizer.group_size] q = self.wquantizer.quant_dequant( w.unsqueeze(1), self.qparams['scale'], self.qparams['zero'], self.qparams['qmax'], self.qparams['qmin'], ).squeeze(1) tmp1[:, i] = w Losses1[:, i] = ((w - q) ** 2) / (2 * d**2) err1 = (w - q) / d W1[:, i:] -= err1.unsqueeze(1).matmul(Hinv1[i, i:].unsqueeze(0)) Err1[:, i] = err1 tmp[:, i1:i2], Losses[:, i1:i2] = tmp1, Losses1 W[:, i2:] -= Err1.matmul(Hinv[i1:i2, i2:])
Metadata
Metadata
Assignees
Labels
No labels