1/*******************************************************************\
3Module: Preprocess a goto-programs so that calls to the java Character
4 library are replaced by simple expressions.
6Author: Romain Brenguier
10\*******************************************************************/
34 const exprt &result=function_call.
lhs();
61 const exprt &
chr,
const std::list<mp_integer> &list)
64 for(
const auto &i : list)
119 const exprt &result=function_call.
lhs();
145 // If there is no radix argument we set it to 36 which is the maximum possible
148 const exprt &result=function_call.
lhs();
151 // TODO: If the radix is not in the range MIN_RADIX <= radix <= MAX_RADIX or
152 // if the value of ch is not a valid digit in the specified radix,
155 // Case 1: The method isDigit is true of the character and the Unicode
156 // decimal digit value of the character (or its single-character
157 // decomposition) is less than the specified radix.
162 // TODO: this is only valid for latin digits
166 // Case 2: The character is one of the uppercase Latin letters 'A'
167 // through 'Z' and its code is less than radix + 'A' - 10,
168 // then ch - 'A' + 10 is returned.
176 // The character is one of the lowercase Latin letters 'a' through 'z' and
177 // its code is less than radix + 'a' - 10, then ch - 'a' + 10 is returned.
185 // The character is one of the fullwidth uppercase Latin letters A ('\uFF21')
186 // through Z ('\uFF3A') and its code is less than radix + '\uFF21' - 10.
187 // In this case, ch - '\uFF21' + 10 is returned.
194 // The character is one of the fullwidth lowercase Latin letters a ('\uFF41')
195 // through z ('\uFF5A') and its code is less than radix + '\uFF41' - 10.
196 // In this case, ch - '\uFF41' + 10 is returned.
230 const exprt &result=function_call.
lhs();
247 // TODO: This is unimplemented for now as it requires analyzing
248 // the UnicodeData file to find characters directionality.
288 // TODO: This is unimplemented for now as it requires analyzing
289 // the UnicodeData file to categorize characters.
334 (
void)type;
// unused parameter
345 (
void)type;
// unused parameter
400 (
void)type;
// unused parameter
423 (
void)type;
// unused parameter
424 // The following intervals are undefined in unicode, according to
425 // the Unicode Character Database: http://www.unicode.org/Public/UCD/latest/
488 (
void)type;
// unused parameter
497 return std::move(
digit);
527 (
void)type;
// unused parameter
552 (
void)type;
// unused parameter
593 const exprt &result=function_call.
lhs();
607 const exprt &result=function_call.
lhs();
765 const exprt &result=function_call.
lhs();
780 (
void)type;
// unused parameter
781 return in_list_expr(
chr, {0x28, 0x29, 0x3C, 0x3E, 0x5B, 0x5D, 0x7B, 0x7D});
823 (
void)type;
// unused parameter
825 {0x20, 0x00A0, 0x1680, 0x202F, 0x205F, 0x3000, 0x2028, 0x2029};
858 (
void)type;
// unused parameter
879 (
void)type;
// unused parameter
903 const exprt &result=function_call.
lhs();
916 (
void)type;
// unused parameter
918 {0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x1FBC, 0x1FCC, 0x1FFC};
954 (
void)type;
// unused parameter
955 // The following set of characters is the general category "Nl" in the
956 // Unicode specification.
1068 (
void)type;
// unused parameter
1093 (
void)type;
// unused parameter
1096 {0x20, 0x1680, 0x205F, 0x3000, 0x2028, 0x2029};
1204 std::list<mp_integer>
increment_list={0x01C4, 0x01C7, 0x01CA, 0x01F1};
1205 std::list<mp_integer>
decrement_list={0x01C6, 0x01C9, 0x01CC, 0x01F3};
1209 std::list<mp_integer>
plus_9_list={0x1FB3, 0x1FC3, 0x1FF3};
1298 return (it->second)(code);
1307 // All methods are listed here in alphabetic order
1308 // The ones that are not supported by this module (though they may be
1309 // supported by the string solver) have no entry in the conversion
1310 // table and are marked in this way:
1311 // Not supported "java::java.lang.Character.<init>()"
1318 // Not supported "java::java.lang.Character.codePointAt:([CI)I
1319 // Not supported "java::java.lang.Character.codePointAt:([CII)I"
1320 // Not supported "java::java.lang.Character.codePointAt:"
1321 // "(Ljava.lang.CharSequence;I)I"
1322 // Not supported "java::java.lang.Character.codePointBefore:([CI)I"
1323 // Not supported "java::java.lang.Character.codePointBefore:([CII)I"
1324 // Not supported "java::java.lang.Character.codePointBefore:"
1325 // "(Ljava.lang.CharSequence;I)I"
1326 // Not supported "java::java.lang.Character.codePointCount:([CII)I"
1327 // Not supported "java::java.lang.Character.codePointCount:"
1328 // "(Ljava.lang.CharSequence;II)I"
1329 // Not supported "java::java.lang.Character.compareTo:"
1330 // "(Ljava.lang.Character;)I"
1339 // Not supported "java::java.lang.Character.equals:(Ljava.lang.Object;)Z"
1348 // Not supported "java::java.lang.Character.getName:(I)Ljava.lang.String;"
1420 conversion_table[
"java::java.lang.Character.isSupplementaryCodePoint:(I)Z"]=
1430 conversion_table[
"java::java.lang.Character.isUnicodeIdentifierPart:(C)Z"]=
1432 conversion_table[
"java::java.lang.Character.isUnicodeIdentifierPart:(I)Z"]=
1434 conversion_table[
"java::java.lang.Character.isUnicodeIdentifierStart:(C)Z"]=
1436 conversion_table[
"java::java.lang.Character.isUnicodeIdentifierStart:(I)Z"]=
1449 // Not supported "java::java.lang.Character.offsetByCodePoints:([CIIII)I"
1450 // Not supported "java::java.lang.Character.offsetByCodePoints:"
1451 // "(Ljava.lang.CharacterSequence;II)I"
1456 // Not supported "java::java.lang.Character.toChars:(I[CI)I"
1463 // Not supported "java::java.lang.Character.toString:()Ljava.lang.String;"
1464 // Not supported "java::java.lang.Character.toString:(C)Ljava.lang.String;"
1475 // Not supported "java::java.lang.Character.valueOf:(C)Ljava.lang.Character;"
API to expression classes for bitvectors.
Preprocess a goto-programs so that calls to the java Character library are replaced by simple express...
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Boolean AND All operands must be boolean, and the result is always boolean.
A base class for relations, i.e., binary predicates whose two operands have the same type.
static codet convert_is_upper_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_lower_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_high_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_function(exprt(*expr_function)(const exprt &chr, const typet &type), conversion_inputt &target)
converts based on a function on expressions
static codet convert_is_whitespace_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_letter_number(const exprt &chr, const typet &type)
Determines if the specified character is in the LETTER_NUMBER category of Unicode.
static exprt expr_of_is_alphabetic(const exprt &chr, const typet &type)
Determines if the specified character (Unicode code point) is alphabetic.
static codet convert_is_ISO_control_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_lower_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_numeric_value_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_identifier_ignorable(const exprt &chr, const typet &type)
Determines if the character is one of ignorable in a Java identifier, that is, it is in one of these ...
static codet convert_is_title_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_part_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_ascii_upper_case(const exprt &chr, const typet &type)
Determines if the specified character is an ASCII uppercase character.
static exprt expr_of_is_defined(const exprt &chr, const typet &type)
Determines if a character is defined in Unicode.
static exprt expr_of_is_unicode_identifier_start(const exprt &chr, const typet &type)
Determines if the specified character is permissible as the first character in a Unicode identifier.
std::unordered_map< irep_idt, conversion_functiont > conversion_table
static codet convert_is_surrogate_pair(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_title_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_compare(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_space_char_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_count(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_char_value(const exprt &chr, const typet &type)
Casts the given expression to the given type.
static exprt expr_of_is_high_surrogate(const exprt &chr, const typet &type)
Determines if the given char value is a Unicode high-surrogate code unit (also known as leading-surro...
static codet convert_is_space(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_space_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_valid_code_point(const exprt &chr, const typet &type)
Determines whether the specified code point is a valid Unicode code point value.
static exprt in_interval_expr(const exprt &chr, const mp_integer &lower_bound, const mp_integer &upper_bound)
The returned expression is true when the first argument is in the interval defined by the lower and u...
void initialize_conversion_table()
fill maps with correspondance from java method names to conversion functions
static exprt expr_of_is_letter_or_digit(const exprt &chr, const typet &type)
Determines if the specified character is a letter or digit.
static codet convert_is_unicode_identifier_part_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_mirrored(const exprt &chr, const typet &type)
Determines whether the character is mirrored according to the Unicode specification.
static codet convert_to_upper_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_mirrored_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_letter(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_defined_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_identifier_ignorable_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_whitespace(const exprt &chr, const typet &type)
Determines if the specified character is white space according to Java.
codet replace_character_call(const code_function_callt &call) const
replace function calls to functions of the Character by an affectation if possible,...
static codet convert_is_letter_or_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_unicode_identifier_part(const exprt &chr, const typet &type)
Determines if the character may be part of a Unicode identifier.
static codet convert_is_ideographic(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_type_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_lower_case(const exprt &chr, const typet &type)
Converts the character argument to lowercase.
static codet convert_is_bmp_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_supplementary_code_point(const exprt &chr, const typet &type)
Determines whether the specified character (Unicode code point) is in the supplementary character ran...
static exprt expr_of_reverse_bytes(const exprt &chr, const typet &type)
Returns the value obtained by reversing the order of the bytes in the specified char value.
static codet convert_is_defined_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_title_case(const exprt &chr, const typet &type)
Converts the character argument to titlecase.
static codet convert_is_supplementary_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_value(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_lower_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_mirrored_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_title_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_title_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_low_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_directionality_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_ascii_lower_case(const exprt &chr, const typet &type)
Determines if the specified character is an ASCII lowercase character.
static codet convert_hash_code(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_start_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt in_list_expr(const exprt &chr, const std::list< mp_integer > &list)
The returned expression is true when the given character is equal to one of the element in the list.
static codet convert_is_java_identifier_start_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static codet convert_for_digit(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_type_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_high_surrogate(const exprt &chr, const typet &type)
Returns the leading surrogate (a high surrogate code unit) of the surrogate pair representing the spe...
static codet convert_is_upper_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_part_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_start_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static codet convert_is_identifier_ignorable_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_ISO_control_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_start_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_char_count(const exprt &chr, const typet &type)
Determines the number of char values needed to represent the specified character (Unicode code point)...
static codet convert_is_java_letter_or_digit(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaLette...
static exprt expr_of_is_space_char(const exprt &chr, const typet &type)
Determines if the specified character is white space according to Unicode (SPACE_SEPARATOR,...
static exprt expr_of_is_digit(const exprt &chr, const typet &type)
Determines if the specified character is a digit.
static exprt expr_of_is_bmp_code_point(const exprt &chr, const typet &type)
Determines whether the specified character (Unicode code point) is in the Basic Multilingual Plane (B...
static codet convert_reverse_bytes(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_surrogate(const exprt &chr, const typet &type)
Determines if the given char value is a Unicode surrogate code unit.
static codet convert_get_numeric_value_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_upper_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_or_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_directionality_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_lower_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_whitespace_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_letter(const exprt &chr, const typet &type)
Determines if the specified character is a letter.
static exprt expr_of_is_title_case(const exprt &chr, const typet &type)
Determines if the specified character is a titlecase character.
static codet convert_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_upper_case(const exprt &chr, const typet &type)
Converts the character argument to uppercase.
static codet convert_is_valid_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_alphabetic(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_part_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static exprt expr_of_low_surrogate(const exprt &chr, const typet &type)
Returns the trailing surrogate (a low surrogate code unit) of the surrogate pair representing the spe...
A goto_instruction_codet representing an assignment in the program.
goto_instruction_codet representation of a function call statement.
Data structure for representing an arbitrary statement in a program.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Base class for all expressions.
std::vector< exprt > operandst
typet & type()
Return the type of the expression.
The trinary if-then-else operator.
const irep_idt & id() const
Modulo defined as lhs-(rhs * truncate(lhs/rhs)).
Boolean OR All operands must be boolean, and the result is always boolean.
The plus expression Associativity is not specified.
Semantic type conversion.
The type of an expression, extends irept.
#define PRECONDITION(CONDITION)
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
API to expression classes.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
static exprt is_low_surrogate(const exprt &chr)
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en....
static exprt is_high_surrogate(const exprt &chr)
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en....