@@ -81,13 +81,43 @@ namespace gr {
8181 *(ordered_OS_it++) = OS[PS[s][i]*I + PI[s][i]];
8282 *(ordered_PS_it++) = PS[s][i];
8383 }
84+  else  {
85+  *(ordered_OS_it++) = -1 ;
86+  *(ordered_PS_it++) = -1 ;
87+  }
8488 }
8589 }
8690
91+  // Memory reservations
92+  d_zeros = (float *)volk_malloc (S * sizeof (float ), volk_get_alignment ());
93+  std::fill (d_zeros, d_zeros + S, 0.0 );
94+ 95+  d_alpha_curr = (float *)volk_malloc (S*sizeof (float ), volk_get_alignment ());
96+ 97+  d_alpha_prev = (float *)volk_malloc (S*sizeof (float ), volk_get_alignment ());
98+ 99+  d_can_metrics = (float *)volk_malloc (S*d_max_size_PS_s*sizeof (float ),
100+  volk_get_alignment ());
101+ 102+  d_ordered_in_k = (float *)volk_malloc (d_max_size_PS_s * S * sizeof (float ),
103+  volk_get_alignment ());
104+ 105+  d_trace = (int *)malloc (K*S*sizeof (int ));
106+ 87107 set_relative_rate (1.0  / ((double )d_FSM.O ()));
88108 set_output_multiple (d_K);
89109 }
90110
111+  viterbi_volk_state_impl::~viterbi_volk_state_impl ()
112+  {
113+  volk_free (d_zeros);
114+  volk_free (d_alpha_prev);
115+  volk_free (d_alpha_curr);
116+  volk_free (d_can_metrics);
117+  volk_free (d_ordered_in_k);
118+  free (d_trace);
119+  }
120+ 91121 void 
92122 viterbi_volk_state_impl::set_FSM (const  gr::trellis::fsm &FSM)
93123 {
@@ -157,20 +187,28 @@ namespace gr {
157187 viterbi_volk_state_impl::compute_all_metrics (const  float  *alpha_prev,
158188 const  float  *in_k, float  *can_metrics)
159189 {
190+  size_t  n_pts = d_max_size_PS_s * d_FSM.S ();
191+ 160192 std::vector<int >::const_iterator ordered_OS_it = d_ordered_OS.begin ();
161193 std::vector<int >::const_iterator ordered_PS_it = d_ordered_PS.begin ();
162194
163-  for (size_t  i=0  ; i < d_max_size_PS_s ; ++i) {
164-  for (int  s=0  ; s < d_FSM.S () ; ++s) {
165-  if  (i < d_FSM.PS ()[s].size ()) {
166-  *(can_metrics++) = alpha_prev[*(ordered_PS_it++)] + \
167-  in_k[*(ordered_OS_it++)];
168-  }
169-  else  {
170-  *(can_metrics++) = std::numeric_limits<float >::max ();
171-  }
195+  float  *ordered_in_k_it = d_ordered_in_k;
196+  float  *can_metrics_it = can_metrics;
197+ 198+  for (size_t  i=0  ; i < n_pts ; ++i) {
199+  if  (!(*ordered_PS_it < 0 )) {
200+  *(can_metrics_it++) = alpha_prev[*(ordered_PS_it++)];
201+  *(ordered_in_k_it++) = in_k[*(ordered_OS_it++)];
202+  }
203+  else  {
204+  *(can_metrics_it++) = std::numeric_limits<float >::max ();
205+  *(ordered_in_k_it++) = std::numeric_limits<float >::max ();
206+  ordered_PS_it++;
207+  ordered_OS_it++;
172208 }
173209 }
210+ 211+  volk_32f_x2_add_32f (can_metrics, can_metrics, d_ordered_in_k, n_pts);
174212 }
175213
176214 // Volk optimized implementation adapted when the number of branch between
@@ -183,74 +221,61 @@ namespace gr {
183221 const  float  *in, unsigned  char  *out)
184222 {
185223 int  tb_state, pidx;
186-  float  min_metric = std::numeric_limits< float >:: max () ;
224+  float  *min_metric_ptr ;
187225
188-  // TODO: reserve once at construction, use iterators here
189-  float  *alpha_curr = (float *)malloc (S*sizeof (float ));
190-  float  *alpha_prev = (float *)malloc (S*sizeof (float ));
191-  int  *trace = (int *)malloc (K*S*sizeof (int ));
192-  std::fill (trace, trace + K*S, 0 );
226+  // Iterators
227+  int  *trace_it = d_trace;
228+  float  *can_metrics_it = d_can_metrics;
193229
194-  // Variables to be allocated by volk (for best alignment)
195-  float  *can_metrics = (float *)volk_malloc (S*d_max_size_PS_s*sizeof (float ),
196-  volk_get_alignment ());
230+  // Initialize traceback vector
231+  std::fill (trace_it, trace_it + K*S, 0 );
197232
198233 // If initial state was specified
199234 if (S0 != -1 ) {
200-  std::fill (alpha_prev, alpha_prev + S, std::numeric_limits<float >::max ());
201-  alpha_prev[S0] = 0.0 ;
235+  std::fill (d_alpha_prev, d_alpha_prev + S,
236+  std::numeric_limits<float >::max ());
237+  d_alpha_prev[S0] = 0.0 ;
202238 }
203239 else  {
204-  std::fill (alpha_prev, alpha_prev  + S, 0.0 );
240+  std::fill (d_alpha_prev, d_alpha_prev  + S, 0.0 );
205241 }
206- 207242 for (float * in_k=(float *)in ; in_k < (float *)in + K*O ; in_k += O) {
208243 // ADD
209-  compute_all_metrics (alpha_prev , in_k, can_metrics );
244+  compute_all_metrics (d_alpha_prev , in_k, d_can_metrics );
210245
211246 // Pre-loop
212-  for (int  s=0  ; s < S ; ++s) {
213-  *(alpha_curr++) = *(can_metrics++);
214-  }
215-  alpha_curr -= S;
216-  min_metric = *std::min_element (alpha_curr, alpha_curr+S);
247+  std::copy (d_can_metrics, d_can_metrics + S, d_alpha_curr);
248+  can_metrics_it += S;
217249
218250 // Loop
219251 for (size_t  i=1  ; i < d_max_size_PS_s ; ++i) {
220-  // Todo try to volkize using (a-b)>0
252+  // d_can_metrics[s] = fmax(0.0, d_alpha_curr[s] - d_can_metrics[s])
253+  volk_32f_x2_subtract_32f (can_metrics_it, d_alpha_curr, can_metrics_it, S);
254+  volk_32f_x2_max_32f (can_metrics_it, d_zeros, can_metrics_it, S);
255+ 256+  // d_alpha_curr[s] -= d_can_metrics[s]
257+  volk_32f_x2_subtract_32f (d_alpha_curr, d_alpha_curr, can_metrics_it, S);
258+ 221259 for (int  s=0  ; s < S ; ++s) {
222-  // COMPARE
223-  if (*can_metrics < *alpha_curr) {
224-  // SELECT
225-  *alpha_curr = *can_metrics;
226-  if (*alpha_curr < min_metric) {
227-  min_metric = *alpha_curr;
228-  }
229-  *trace = i;
230-  }
231- 232-  // Update iterators
233-  ++can_metrics;
234-  ++alpha_curr;
235-  ++trace;
260+  //  COMPARE // SELECT
261+  *(trace_it++) = (*(can_metrics_it++) > 0.0 )?i:*trace_it;
236262 }
237- 238-  // Reset iterators
239-  alpha_curr -= S;
240-  trace -= S;
263+ 264+  // Update iterators
265+  trace_it -= S;
241266 }
242-  // Reset iterators
243-  can_metrics -= S*d_max_size_PS_s;
244- 245-  // Metrics normalization
246-  std::transform (alpha_curr, alpha_curr + S, alpha_curr,
247-  std::bind2nd (std::minus<double >(), min_metric));
248267
249268 // At this point, current path metrics becomes previous path metrics
250-  std::swap (alpha_prev, alpha_curr);
269+  std::swap (d_alpha_prev, d_alpha_curr);
270+ 271+  // Metrics normalization
272+  min_metric_ptr = std::min_element (d_alpha_prev, d_alpha_prev + S);
273+  std::transform (d_alpha_prev, d_alpha_prev + S, d_alpha_prev,
274+  std::bind2nd (std::minus<float >(), *min_metric_ptr));
251275
252276 // Update iterators
253-  trace += S;
277+  trace_it += S;
278+  can_metrics_it = d_can_metrics;
254279 }
255280
256281 // If final state was specified
@@ -259,30 +284,24 @@ namespace gr {
259284 }
260285 else {
261286 // at this point, alpha_prev contains the path metrics of states after time K
262-  tb_state = (int )(std::min_element (alpha_prev, alpha_prev + S) - alpha_prev );
287+  tb_state = (int )(min_metric_ptr - d_alpha_prev );
263288 }
264289
265290 // Traceback
266-  trace  -= S; // place trace at the last time index
291+  trace_it  -= S; // place trace at the last time index
267292
268293 for (unsigned  char * out_k = out+K-1  ; out_k >= out ; --out_k) {
269294 // Retrieve previous input index from trace
270-  pidx=*(trace  + tb_state);
295+  pidx=*(trace_it  + tb_state);
271296 // Update trace for next output symbol
272-  trace  -= S;
297+  trace_it  -= S;
273298
274299 // Output previous input
275300 *out_k = (unsigned  char ) PI[tb_state][pidx];
276301
277302 // Update tb_state with the previous state on the shortest path
278303 tb_state = PS[tb_state][pidx];
279304 }
280- 281-  trace += S; // place trace at the first time index
282- 283-  free (alpha_prev);
284-  free (alpha_curr);
285-  volk_free (can_metrics);
286305 }
287306
288307
0 commit comments