1/*******************************************************************\
3Module: Java local variable table processing
5Author: Chris Smowton, chris.smowton@diffblue.com
7\*******************************************************************/
23// Specialise the CFG representation to work over Java instead of GOTO programs.
24// This must be done at global scope due to template resolution rules.
32 cfg_base_nodet<T, java_bytecode_convert_methodt::method_offsett>>
53 // Map instruction PCs onto node indices:
56 (*this)[entry_map[
inst.first]].PC=
inst.first;
58 // Add edges declared in the address map:
61 for(
auto succ :
inst.second.successors)
64 // Next, add edges declared in the exception table, which
65 // don't figure in the address map successors/predecessors as yet:
66 for(
const auto &
table_entry : method.exception_table)
71 "Exception table entry doesn't point to an instruction?");
74 // For now just assume any non-branch
75 // instruction could potentially throw.
85 entry_map.at(
findit->first),
127// Grab some class typedefs for brevity:
139// Comparators for local variables:
145 return a.var.index<
b.var.index;
151 return a->var.start_pc<
b->var.start_pc;
154// The predecessor map, and a top-sorting comparator:
158 std::set<local_variable_with_holest *> >
174 return findit->second.count(
b)>0;
178// Helper routines for the find-initializers code below:
188 std::set<local_variable_with_holest*> &result)
190 if(!result.insert(start).second)
214 if(
inst.args.size()==1)
216 // Store with an argument:
217 const auto &arg=
inst.args[0];
222 // Store shorthands, like "store_0", "store_1"
225 "expected store instruction looks like store_0, store_1...");
229 "store_? instructions should end in a digit");
259 local_variable_table_with_holest::iterator
firstvar,
260 local_variable_table_with_holest::iterator
varlimit,
268 for(
auto idx = it->var.start_pc,
idxlim = it->var.start_pc + it->var.length;
304 local_variable_table_with_holest::iterator
firstvar,
305 local_variable_table_with_holest::iterator
varlimit,
314 // All entries of the "local_variable_table_with_holest" processed in this
315 // function concern the same Java Local Variable Table slot/register. This
316 // is because "find_initializers()" has already sorted them.
319 "all entries are for the same local variable slot");
321 // Parameters are irrelevant to us and shouldn't be changed. This is because
322 // they cannot have live predecessor ranges: they are initialized by the JVM
323 // and no other live variable range can flow into them.
328 msg.debug() <<
"jcm: ppm: processing var idx " << it->var.index
329 <<
" name '" << it->var.name <<
"' start-pc "
330 << it->var.start_pc <<
" len " << it->var.length
334 // Find the last instruction within the live range:
335 const auto end_pc = it->var.start_pc + it->var.length;
339 "current bytecode shall not be the first");
346 "Instruction live range doesn't align to instruction boundary?");
349 // Find vartable entries that flow into this one. For unknown reasons this
350 // loop iterates backwards, walking back from the last bytecode in the live
351 // range of variable it to the first one. For each value of the iterator
352 // "amapit" we search for instructions that jump into amapit's address
359 // pred is the address (byecode offset) of a instruction that jumps into
360 // amapit. Compute now a pointer to the variable-with-holes whose index
361 // equals that of amapit and which was alive on instruction pred, or a
362 // null pointer if no such variable exists (e.g., because no live range
363 // covers that instruction)
369 // Three cases are now possible:
370 // 1. The predecessor instruction is in the same live range: nothing to
376 // 2. The predecessor instruction is in no live range among those for
377 // variable slot it->var.index
380 // Check if this is an initializer, and if so expand the live range
381 // to include it, but don't check its predecessors:
385 "we shall not be on the first bytecode of the method");
389 // These sorts of infeasible edges can occur because jsr
390 // handling is presently vague (any subroutine is assumed to
391 // be able to return to any callsite)
392 msg.warning() <<
"Local variable table: ignoring flow from "
393 <<
"out of range for " << it->var.name <<
' '
402 msg.warning() <<
"Local variable table: didn't find initializing "
403 <<
"store for predecessor of bytecode at address "
405 <<
amapit->second.predecessors.size()
406 <<
" predecessors)" <<
msg.eom;
407 throw "local variable table: unexpected live ranges";
411 // 3. Predecessor instruction is a different range associated to the
412 // same variable slot
415 if(
pred_var->var.name!=it->var.name ||
416 pred_var->var.descriptor!=it->var.descriptor)
418 // These sorts of infeasible edges can occur because
419 // jsr handling is presently vague (any subroutine is
420 // assumed to be able to return to any callsite)
421 msg.warning() <<
"Local variable table: ignoring flow from "
422 <<
"clashing variable for "
423 << it->var.name <<
' ' <<
pred <<
" -> "
427 // OK, this is a flow from a similar but
428 // distinct entry in the local var table.
434 // If a simple pre-block initializer was found,
435 // add it to the live range now:
449 const std::set<local_variable_with_holest *> &
merge_vars,
455 std::numeric_limits<java_bytecode_convert_methodt::method_offsett>::max();
462 std::vector<java_bytecode_convert_methodt::method_offsett>
476 // Working from the back, simply find the first PC
477 // that occurs merge_vars.size() times and therefore
478 // dominates all vars we seek to merge:
482 /* Don't increment here */)
496 throw "variable live ranges with no common dominator?";
509 const std::set<local_variable_with_holest *> &
merge_vars,
541 const std::set<local_variable_with_holest *> &
merge_vars,
545 // Because we need a lexically-scoped declaration,
546 // we must have the merged variable
547 // enter scope both in a block that dominates all entries, and which
548 // precedes them in program order.
552 // Populate the holes in the live range
553 // (addresses where the new variable will be in scope,
554 // but references to this stack slot should not resolve to it
555 // as it was not visible in the original local variable table)
561 if(v->var.start_pc+v->var.length>
last_pc)
562 last_pc=v->var.start_pc+v->var.length;
565 // Apply the changes:
578 // Nuke the now-subsumed var-table entries:
598 local_variable_table_with_holest::iterator
firstvar,
599 local_variable_table_with_holest::iterator
varlimit,
603 // Build a simple map from instruction PC to the variable
604 // live in this slot at that PC, and a map from each variable
605 // to variables that flow into it:
609 // Now find variables that flow together by
610 // walking backwards to find initializers
611 // or branches from other live ranges:
621 // OK, we've established the flows all seem sensible.
622 // Now merge vartable entries according to the predecessor_map:
624 // Take the transitive closure of the predecessor map:
632 // Top-sort so that we get the bottom variables first:
640 // Now merge the entries:
643 // Already merged into another variable?
668 local_variable_table_with_holest::iterator &
it1,
669 local_variable_table_with_holest::iterator &
it2,
670 local_variable_table_with_holest::iterator
itend)
679 auto index=
it2->var.index;
696 // Sort table entries by local slot index:
699 // For each block of entries using a particular index,
700 // try to combine them:
721 // Move to end; consider the new element we've swapped in:
725 --i;
// May make i (size_t)-1, but it will immediately be
726 // re-incremented as the loop iterates.
730 // Remove un-needed entries.
744 // Compute CFG dominator tree
754 log.
debug() <<
"jcm: setup-local-vars: lvt size "
758 // Find out which local variable table entries should be merged:
759 // Wrap each entry so we have a data structure to work during function calls,
760 // where we record live ranges with holes:
765 // Merge variable records. See documentation of in
766 // `find_initializers_for_slot` for more details. If the strategy employed
767 // there fails with an exception, we just ignore the LVT for this method, no
768 // variable is generated in `this->variables[]` (because we return here and
769 // dont let the for loop below to execute), and as a result the method
770 // this->variable() will be forced to create new `anonlocal::` variables, as
771 // the only source of variable names for that method is `this->variables[]`.
776 catch(
const char *message)
778 log.warning() <<
"Bytecode -> codet translation error: " << message
780 <<
"This is probably due to an unexpected LVT, "
781 <<
"falling back to translation without LVT" <<
messaget::eom;
785 // Clean up removed records from the variable table:
788 // Do the locals and parameters in the variable table, which is available when
789 // compiled with -g or for methods with many local variables in the latter
790 // case, different variables can have the same index, depending on the
793 // to calculate which variable to use, one uses the address of the instruction
794 // that uses the variable, the size of the instruction and the start_pc /
795 // length values in the local variable table
802 log.debug() <<
"jcm: setup-local-vars: merged variable: idx " << v.var.index
803 <<
" name " << v.var.name <<
" v.var.descriptor '"
804 << v.var.descriptor <<
"' holes " << v.holes.size()
814 "A method name has the format class `.` method `:(`signature`)`.");
817 const typet t = v.var.signature.has_value()
819 v.var.descriptor, v.var.signature, class_name)
822 std::ostringstream
id_oss;
823 id_oss << method_id <<
"::" << v.var.start_pc <<
"::" << v.var.name;
828 // Create a new local variable in the variables[] array, the result of
829 // merging multiple local variables with equal name (parameters excluded)
830 // into a single local variable with holes
831 variables[v.var.index].emplace_back(
832 result, v.var.start_pc, v.var.length,
false, std::move(v.holes));
834 // Register the local variable in the symbol table
836 new_symbol.base_name=v.var.name;
837 new_symbol.pretty_name=
id2string(identifier).substr(6, std::string::npos);
838 new_symbol.is_file_local=
true;
839 new_symbol.is_thread_local=
true;
840 new_symbol.is_lvalue=
true;
841 symbol_table.add(new_symbol);
858 size_t start_pc=var.start_pc;
859 size_t length=var.length;
860 if(address>=start_pc && address<(start_pc+length))
863 for(
auto &
hole : var.holes)
864 if(address>=
hole.start_pc && address<(
hole.start_pc+
hole.length))
874 // add unnamed local variable to end of list at this index
875 // with scope from 0 to SIZE_T_MAX
876 // as it is at the end of the vector, it will only be taken into account
877 // if no other variable is valid
struct bytecode_infot const bytecode_info[]
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
entryt & at(const goto_programt::const_targett &t)
entryt get_node_index(const goto_programt::const_targett &program_point) const
Get the graph node index for program_point.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
A generic directed graph with a parametric node type.
node_indext add_node(arguments &&... values)
void add_edge(node_indext a, node_indext b)
method_offsett slots_for_parameters
Number of local variable slots used by the JVM to pass parameters upon invocation of the method under...
std::vector< variablet > variablest
void setup_local_variables(const methodt &m, const address_mapt &amap)
See find_initializers_for_slot above for more detail.
const variablet & find_variable_for_slot(size_t address, variablest &var_list)
See above.
void find_initializers(local_variable_table_with_holest &vars, const address_mapt &amap, const java_cfg_dominatorst &doms)
See find_initializers_for_slot above for more detail.
std::vector< local_variable_with_holest > local_variable_table_with_holest
void find_initializers_for_slot(local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, const address_mapt &amap, const java_cfg_dominatorst &doms)
Given a sequence of users of the same local variable slot, this figures out which ones are related by...
bool is_parameter(const local_variablet &v)
Returns true iff the slot index of the local variable of a method (coming from the LVT) is a paramete...
std::map< method_offsett, converted_instructiont > address_mapt
Class that provides messages with a built-in verbosity 'level'.
message_handlert & get_message_handler()
mstreamt & status() const
Expression to hold a symbol (variable)
The type of an expression, extends irept.
const std::string & id2string(const irep_idt &d)
JAVA Bytecode Language Conversion.
static void maybe_add_hole(local_variable_with_holest &var, java_bytecode_convert_methodt::method_offsett from, java_bytecode_convert_methodt::method_offsett to)
See above.
std::map< local_variable_with_holest *, std::set< local_variable_with_holest * > > predecessor_mapt
static bool lt_index(const local_variable_with_holest &a, const local_variable_with_holest &b)
java_bytecode_convert_methodt::java_cfg_dominatorst java_cfg_dominatorst
static void populate_variable_address_map(local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, std::vector< local_variable_with_holest * > &live_variable_at_address)
See above.
static bool is_store_to_slot(const java_bytecode_convert_methodt::instructiont &inst, unsigned slotidx)
See above.
static void populate_predecessor_map(local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, const std::vector< local_variable_with_holest * > &live_variable_at_address, const address_mapt &amap, predecessor_mapt &predecessor_map, message_handlert &msg_handler)
Populates the predecessor_map with a graph from local variable table entries to their predecessors (t...
static void populate_live_range_holes(local_variable_with_holest &merge_into, const std::set< local_variable_with_holest * > &merge_vars, java_bytecode_convert_methodt::method_offsett expanded_live_range_start)
See above.
static bool lt_startpc(const local_variable_with_holest *a, const local_variable_with_holest *b)
java_bytecode_convert_methodt::address_mapt address_mapt
java_bytecode_convert_methodt::holet holet
static void cleanup_var_table(std::vector< local_variable_with_holest > &vars_with_holes)
See above.
static java_bytecode_convert_methodt::method_offsett get_common_dominator(const std::set< local_variable_with_holest * > &merge_vars, const java_cfg_dominatorst &dominator_analysis)
Used to find out where to put a variable declaration that subsumes several variable live ranges.
java_bytecode_convert_methodt::local_variable_with_holest local_variable_with_holest
static void walk_to_next_index(local_variable_table_with_holest::iterator &it1, local_variable_table_with_holest::iterator &it2, local_variable_table_with_holest::iterator itend)
Walk a vector, a contiguous block of entries with equal slot index at a time.
static void merge_variable_table_entries(local_variable_with_holest &merge_into, const std::set< local_variable_with_holest * > &merge_vars, const java_cfg_dominatorst &dominator_analysis, std::ostream &debug_out)
See above.
java_bytecode_convert_methodt::local_variable_table_with_holest local_variable_table_with_holest
static void gather_transitive_predecessors(local_variable_with_holest *start, const predecessor_mapt &predecessor_map, std::set< local_variable_with_holest * > &result)
See above.
std::optional< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
std::optional< typet > java_type_from_string_with_exception(const std::string &descriptor, const std::optional< std::string > &signature, const std::string &class_name)
#define PRECONDITION(CONDITION)
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
unsigned safe_string2unsigned(std::string_view str, int base)
bool operator()(local_variable_with_holest *a, local_variable_with_holest *b) const
is_predecessor_oft(const predecessor_mapt &_order)
const predecessor_mapt & order
std::vector< holet > holes
std::pair< const methodt &, const address_mapt & > method_with_amap
local_variable_tablet local_variable_table
nodet & get_node(const java_bytecode_convert_methodt::method_offsett &instruction)
procedure_local_cfg_baset()
java_bytecode_convert_methodt::method_with_amapt method_with_amapt
java_bytecode_convert_methodt::method_offsett get_node_index(const java_bytecode_convert_methodt::method_offsett &instruction) const
const nodet & get_node(const java_bytecode_convert_methodt::method_offsett &instruction) const
std::map< java_bytecode_convert_methodt::method_offsett, java_bytecode_convert_methodt::method_offsett > entry_mapt
static bool nodes_empty(const method_with_amapt &args)
static java_bytecode_convert_methodt::method_offsett get_first_node(const method_with_amapt &args)
grapht< cfg_base_nodet< T, java_bytecode_convert_methodt::method_offsett > > base_grapht
void operator()(const method_with_amapt &args)
static java_bytecode_convert_methodt::method_offsett get_last_node(const method_with_amapt &args)