Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit d55df18

Browse files
author
Alexandre Marquet
committed
Use volk for ACS in viterbi_volk_state.
1 parent 921cbb3 commit d55df18

File tree

2 files changed

+99
-67
lines changed

2 files changed

+99
-67
lines changed

‎lib/viterbi_volk_state_impl.cc‎

Lines changed: 84 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,43 @@ namespace gr {
8181
*(ordered_OS_it++) = OS[PS[s][i]*I + PI[s][i]];
8282
*(ordered_PS_it++) = PS[s][i];
8383
}
84+
else {
85+
*(ordered_OS_it++) = -1;
86+
*(ordered_PS_it++) = -1;
87+
}
8488
}
8589
}
8690

91+
//Memory reservations
92+
d_zeros = (float*)volk_malloc(S * sizeof(float), volk_get_alignment());
93+
std::fill(d_zeros, d_zeros + S, 0.0);
94+
95+
d_alpha_curr = (float*)volk_malloc(S*sizeof(float), volk_get_alignment());
96+
97+
d_alpha_prev = (float*)volk_malloc(S*sizeof(float), volk_get_alignment());
98+
99+
d_can_metrics = (float*)volk_malloc(S*d_max_size_PS_s*sizeof(float),
100+
volk_get_alignment());
101+
102+
d_ordered_in_k = (float*)volk_malloc(d_max_size_PS_s * S * sizeof(float),
103+
volk_get_alignment());
104+
105+
d_trace = (int*)malloc(K*S*sizeof(int));
106+
87107
set_relative_rate(1.0 / ((double)d_FSM.O()));
88108
set_output_multiple(d_K);
89109
}
90110

111+
viterbi_volk_state_impl::~viterbi_volk_state_impl()
112+
{
113+
volk_free(d_zeros);
114+
volk_free(d_alpha_prev);
115+
volk_free(d_alpha_curr);
116+
volk_free(d_can_metrics);
117+
volk_free(d_ordered_in_k);
118+
free(d_trace);
119+
}
120+
91121
void
92122
viterbi_volk_state_impl::set_FSM(const gr::trellis::fsm &FSM)
93123
{
@@ -157,20 +187,28 @@ namespace gr {
157187
viterbi_volk_state_impl::compute_all_metrics(const float *alpha_prev,
158188
const float *in_k, float *can_metrics)
159189
{
190+
size_t n_pts = d_max_size_PS_s * d_FSM.S();
191+
160192
std::vector<int>::const_iterator ordered_OS_it = d_ordered_OS.begin();
161193
std::vector<int>::const_iterator ordered_PS_it = d_ordered_PS.begin();
162194

163-
for(size_t i=0 ; i < d_max_size_PS_s ; ++i) {
164-
for(int s=0 ; s < d_FSM.S() ; ++s) {
165-
if (i < d_FSM.PS()[s].size()) {
166-
*(can_metrics++) = alpha_prev[*(ordered_PS_it++)] + \
167-
in_k[*(ordered_OS_it++)];
168-
}
169-
else {
170-
*(can_metrics++) = std::numeric_limits<float>::max();
171-
}
195+
float *ordered_in_k_it = d_ordered_in_k;
196+
float *can_metrics_it = can_metrics;
197+
198+
for(size_t i=0 ; i < n_pts ; ++i) {
199+
if (!(*ordered_PS_it < 0)) {
200+
*(can_metrics_it++) = alpha_prev[*(ordered_PS_it++)];
201+
*(ordered_in_k_it++) = in_k[*(ordered_OS_it++)];
202+
}
203+
else {
204+
*(can_metrics_it++) = std::numeric_limits<float>::max();
205+
*(ordered_in_k_it++) = std::numeric_limits<float>::max();
206+
ordered_PS_it++;
207+
ordered_OS_it++;
172208
}
173209
}
210+
211+
volk_32f_x2_add_32f(can_metrics, can_metrics, d_ordered_in_k, n_pts);
174212
}
175213

176214
//Volk optimized implementation adapted when the number of branch between
@@ -183,74 +221,61 @@ namespace gr {
183221
const float *in, unsigned char *out)
184222
{
185223
int tb_state, pidx;
186-
float min_metric = std::numeric_limits<float>::max();
224+
float *min_metric_ptr;
187225

188-
//TODO: reserve once at construction, use iterators here
189-
float *alpha_curr = (float*)malloc(S*sizeof(float));
190-
float *alpha_prev = (float*)malloc(S*sizeof(float));
191-
int *trace = (int*)malloc(K*S*sizeof(int));
192-
std::fill(trace, trace + K*S, 0);
226+
//Iterators
227+
int *trace_it = d_trace;
228+
float *can_metrics_it = d_can_metrics;
193229

194-
//Variables to be allocated by volk (for best alignment)
195-
float *can_metrics = (float*)volk_malloc(S*d_max_size_PS_s*sizeof(float),
196-
volk_get_alignment());
230+
//Initialize traceback vector
231+
std::fill(trace_it, trace_it + K*S, 0);
197232

198233
//If initial state was specified
199234
if(S0 != -1) {
200-
std::fill(alpha_prev, alpha_prev + S, std::numeric_limits<float>::max());
201-
alpha_prev[S0] = 0.0;
235+
std::fill(d_alpha_prev, d_alpha_prev + S,
236+
std::numeric_limits<float>::max());
237+
d_alpha_prev[S0] = 0.0;
202238
}
203239
else {
204-
std::fill(alpha_prev, alpha_prev + S, 0.0);
240+
std::fill(d_alpha_prev, d_alpha_prev + S, 0.0);
205241
}
206-
207242
for(float* in_k=(float*)in ; in_k < (float*)in + K*O ; in_k += O) {
208243
//ADD
209-
compute_all_metrics(alpha_prev, in_k, can_metrics);
244+
compute_all_metrics(d_alpha_prev, in_k, d_can_metrics);
210245

211246
//Pre-loop
212-
for(int s=0 ; s < S ; ++s) {
213-
*(alpha_curr++) = *(can_metrics++);
214-
}
215-
alpha_curr -= S;
216-
min_metric = *std::min_element(alpha_curr, alpha_curr+S);
247+
std::copy(d_can_metrics, d_can_metrics + S, d_alpha_curr);
248+
can_metrics_it += S;
217249

218250
//Loop
219251
for(size_t i=1 ; i < d_max_size_PS_s ; ++i) {
220-
//Todo try to volkize using (a-b)>0
252+
//d_can_metrics[s] = fmax(0.0, d_alpha_curr[s] - d_can_metrics[s])
253+
volk_32f_x2_subtract_32f(can_metrics_it, d_alpha_curr, can_metrics_it, S);
254+
volk_32f_x2_max_32f(can_metrics_it, d_zeros, can_metrics_it, S);
255+
256+
//d_alpha_curr[s] -= d_can_metrics[s]
257+
volk_32f_x2_subtract_32f(d_alpha_curr, d_alpha_curr, can_metrics_it, S);
258+
221259
for(int s=0 ; s < S ; ++s) {
222-
//COMPARE
223-
if(*can_metrics < *alpha_curr) {
224-
//SELECT
225-
*alpha_curr = *can_metrics;
226-
if(*alpha_curr < min_metric) {
227-
min_metric = *alpha_curr;
228-
}
229-
*trace = i;
230-
}
231-
232-
//Update iterators
233-
++can_metrics;
234-
++alpha_curr;
235-
++trace;
260+
// COMPARE // SELECT
261+
*(trace_it++) = (*(can_metrics_it++) > 0.0)?i:*trace_it;
236262
}
237-
238-
//Reset iterators
239-
alpha_curr -= S;
240-
trace -= S;
263+
264+
//Update iterators
265+
trace_it -= S;
241266
}
242-
//Reset iterators
243-
can_metrics -= S*d_max_size_PS_s;
244-
245-
//Metrics normalization
246-
std::transform(alpha_curr, alpha_curr + S, alpha_curr,
247-
std::bind2nd(std::minus<double>(), min_metric));
248267

249268
//At this point, current path metrics becomes previous path metrics
250-
std::swap(alpha_prev, alpha_curr);
269+
std::swap(d_alpha_prev, d_alpha_curr);
270+
271+
//Metrics normalization
272+
min_metric_ptr = std::min_element(d_alpha_prev, d_alpha_prev + S);
273+
std::transform(d_alpha_prev, d_alpha_prev + S, d_alpha_prev,
274+
std::bind2nd(std::minus<float>(), *min_metric_ptr));
251275

252276
//Update iterators
253-
trace += S;
277+
trace_it += S;
278+
can_metrics_it = d_can_metrics;
254279
}
255280

256281
//If final state was specified
@@ -259,30 +284,24 @@ namespace gr {
259284
}
260285
else{
261286
//at this point, alpha_prev contains the path metrics of states after time K
262-
tb_state = (int)(std::min_element(alpha_prev, alpha_prev + S) - alpha_prev);
287+
tb_state = (int)(min_metric_ptr - d_alpha_prev);
263288
}
264289

265290
//Traceback
266-
trace -= S; //place trace at the last time index
291+
trace_it -= S; //place trace at the last time index
267292

268293
for(unsigned char* out_k = out+K-1 ; out_k >= out ; --out_k) {
269294
//Retrieve previous input index from trace
270-
pidx=*(trace + tb_state);
295+
pidx=*(trace_it + tb_state);
271296
//Update trace for next output symbol
272-
trace -= S;
297+
trace_it -= S;
273298

274299
//Output previous input
275300
*out_k = (unsigned char) PI[tb_state][pidx];
276301

277302
//Update tb_state with the previous state on the shortest path
278303
tb_state = PS[tb_state][pidx];
279304
}
280-
281-
trace += S; //place trace at the first time index
282-
283-
free(alpha_prev);
284-
free(alpha_curr);
285-
volk_free(can_metrics);
286305
}
287306

288307

‎lib/viterbi_volk_state_impl.h‎

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,30 @@ namespace gr {
4141
//Same as d_FSM.PS(), but flattened:
4242
//d_ordered_PS[i*S+s] = d_FSM.PS()[s][i]
4343
std::vector<int> d_ordered_PS;
44+
//Input metrics, ordered as d_ordered_in_k[i] = in_k[d_ordered_OS[i]]
45+
float *d_ordered_in_k;
4446
//Max size of PS[s]
4547
size_t d_max_size_PS_s;
4648

49+
//A vector of S zeros
50+
float *d_zeros;
51+
52+
//Store current state metrics
53+
float *d_alpha_curr;
54+
//Store next state metrics
55+
float *d_alpha_prev;
56+
//Store next state candidate metrics
57+
float *d_can_metrics;
58+
//Traceback vector
59+
int *d_trace;
60+
4761
protected:
48-
//void order_alpha_prev_in(int i, const float *alpha_prev, const float *in_k,
49-
// float *alpha_prev_ord, float *in_k_ord);
5062
void compute_all_metrics(const float *alpha_prev, const float *in_k,
5163
float *can_metrics);
5264

5365
public:
5466
viterbi_volk_state_impl(const gr::trellis::fsm &FSM, int K, int S0, int SK);
67+
~viterbi_volk_state_impl();
5568

5669
gr::trellis::fsm FSM() const { return d_FSM; }
5770
int K() const { return d_K; }

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /