Unicode mangling (was Re: [PATCH] Java: New C++ ABI compatibility changes.)

Alexandre Petit-Bianco apbianco@cygnus.com
Tue Jan 16 16:08:00 GMT 2001


Jason Merrill writes:
> I meant _NNNN and _LNNNNNNNN, actually, with a literal _ encoded as __.
> Only the last would actually require a change in the Java frontend.

OK. I have a patch for this. We now mangle something like `<clinit>'
to _ZN1f16_003cclinit_003eEv. A hypothetical `<cl_init>' would be
mangled `_ZN1f17_003ccl__nit_003eEv'.
> What was that used for?

I honestly don't know. Maybe Per remembers.
>>> With these fixes, I think the current scheme is OK. But for targets
>>> with 8-bit clean binutils, I think it makes a lot of sense to just
>>> use the UTF8 encoding in the symbol.
>>>> That's fine too, but requires coordinated changes in binutils.
>> Does it? Having output filters on nm and such to convert from UTF8
> to the current locale's encoding would be good, but not strictly
> necessary.

I'm not quite sure about that -- it's true we don't look at UTF8
encoded symbols that often (even though it depends in which foreign
language you code.) but I would hate having a garbled output. May be
it's just me.
Anyways, I'll check this patch in soon. We can always modify mangling
later to address the case of targets with 8-bit clean binutils (how do
we configure for that?)
./A
2001年01月16日 Alexandre Petit-Bianco <apbianco@cygnus.com>
	* mangle.c (mangle_member_name): Return void.
	(mangle_field_decl): Don't append `U' in escaped names.
	(mangle_method_decl): Likewise.
	(mangle_member_name): Just use append_gpp_mangled_name.
	(unicode_mangling_length): Fixed leading comment. Consider
	mangling `$'. Account for possible mangling of `_'.
	(append_unicode_mangled_name): Consider mangling `$'. Account for
	possible mangling of `_'.
	(append_gpp_mangled_name): Fixed leading comment. Don't prefix
	length with `U' when escapes are necessary.
Index: mangle.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/mangle.c,v
retrieving revision 1.11
diff -u -p -r1.11 mangle.c
--- mangle.c	2001年01月15日 08:01:22	1.11
+++ mangle.c	2001年01月17日 00:04:57
@@ -59,7 +59,7 @@ static void compression_table_add PARAMS
 static void append_unicode_mangled_name PARAMS ((const char *, int));
 static void append_gpp_mangled_name PARAMS ((const char *, int));
 static int unicode_mangling_length PARAMS ((const char *, int));
-static int mangle_member_name PARAMS ((tree));
+static void mangle_member_name PARAMS ((tree));
 
 /* We use an incoming obstack, always to be provided to the interface
 functions. */
@@ -123,19 +123,14 @@ static void
 mangle_field_decl (decl)
 tree decl;
 {
- tree name = DECL_NAME (decl);
- int field_name_needs_escapes = 0;
-
 /* Mangle the name of the this the field belongs to */
 mangle_record_type (DECL_CONTEXT (decl), /* from_pointer = */ 0);
 
 /* Mangle the name of the field */
- field_name_needs_escapes = mangle_member_name (name);
+ mangle_member_name (DECL_NAME (decl));
 
 /* Terminate the mangled name */
 obstack_1grow (mangle_obstack, 'E');
- if (field_name_needs_escapes)
- obstack_1grow (mangle_obstack, 'U');
 }
 
 /* This mangles a method decl, first mangling its name and then all
@@ -147,7 +142,6 @@ mangle_method_decl (mdecl)
 {
 tree method_name = DECL_NAME (mdecl);
 tree arglist;
- int method_name_needs_escapes = 0;
 
 /* Mangle the name of the type that contains mdecl */
 mangle_record_type (DECL_CONTEXT (mdecl), /* from_pointer = */ 0);
@@ -172,7 +166,7 @@ mangle_method_decl (mdecl)
 	obstack_grow (mangle_obstack, "C1", 2);
 }
 else
- method_name_needs_escapes = mangle_member_name (method_name);
+ mangle_member_name (method_name);
 obstack_1grow (mangle_obstack, 'E');
 
 /* We mangled type.methodName. Now onto the arguments. */
@@ -189,31 +183,18 @@ mangle_method_decl (mdecl)
 for (arg = arglist; arg != end_params_node; arg = TREE_CHAIN (arg))
 	mangle_type (TREE_VALUE (arg));
 }
-
- /* Terminate the mangled name */
- if (method_name_needs_escapes)
- obstack_1grow (mangle_obstack, 'U');
 }
 
 /* This mangles a member name, like a function name or a field
 name. Handle cases were `name' is a C++ keyword. Return a non zero
 value if unicode encoding was required. */
 
-static int
+static void
 mangle_member_name (name)
 tree name;
 {
- const char * name_string = IDENTIFIER_POINTER (name);
- int len = IDENTIFIER_LENGTH (name);
- int to_return = 0;
-
- if (unicode_mangling_length (name_string, len) > 0)
- {
- append_unicode_mangled_name (name_string, len);
- to_return = 1;
- }
- else
- append_gpp_mangled_name (name_string, len);
+ append_gpp_mangled_name (IDENTIFIER_POINTER (name),
+			 IDENTIFIER_LENGTH (name));
 
 /* If NAME happens to be a C++ keyword, add `$' or `.' or `_'. */
 if (cxx_keyword_p (IDENTIFIER_POINTER (name), IDENTIFIER_LENGTH (name)))
@@ -228,13 +209,11 @@ mangle_member_name (name)
 #endif /* NO_DOT_IN_LABEL */
 #endif /* NO_DOLLAR_IN_LABEL */
 }
-
- return to_return;
 }
 
-/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate
- the length of the string as mangled (a la g++) including Unicode escapes.
- If no escapes are needed, return 0. */
+/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
+ length of the string as mangled (a la g++) including Unicode
+ escapes. If no escapes are needed, return 0. */
 
 static int
 unicode_mangling_length (name, len)
@@ -255,18 +234,29 @@ unicode_mangling_length (name, len)
 	need_escapes += num_chars == 0;
 else if (ch == '_')
 	underscores++;
- else if (ch != '$' && (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
+ else if (
+#ifdef NO_DOLLAR_IN_LABEL
+	 ch != '$' &&
+#endif
+	 (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
 	need_escapes++;
 num_chars++;
 }
+ /* If we need escapes, compute the length of the mangled symbol
+ string. Mangling a character that needs an escape adds 4 ASCII
+ characters to the string, and in this case, processing a `_' adds
+ one more. */
 if (need_escapes)
- return num_chars + 4 * (need_escapes + underscores);
+ return num_chars + 4 * need_escapes + underscores;
 else
 return 0;
 }
 
 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
- appropriately mangled (with Unicode escapes) to OBSTACK. */
+ appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
+ Characters needing an escape are encoded _UNNNN, in which case `_'
+ will be mangled `__'. `$' is mangled `$' or _U0024 according to
+ NO_DOLLAR_IN_LABEL. */
 
 static void
 append_unicode_mangled_name (name, len)
@@ -287,22 +277,31 @@ append_unicode_mangled_name (name, len)
 if (ch >= '0' && ch <= '9')
 	emit_escape = (ptr == (const unsigned char *) name);
 else
-	emit_escape = (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z');
+	emit_escape = 
+	 (
+#ifdef NO_DOLLAR_IN_LABEL
+	 ch != '$' &&
+#endif
+	 (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'));
 if (emit_escape)
 	{
-	 char buf[6];
-	 sprintf (buf, "_%04x", ch);
-	 obstack_grow (mangle_obstack, buf, 5);
+	 if (ch == '_')
+	 obstack_grow (mangle_obstack, "__", 2);
+	 else
+	 {
+	 char buf[6];
+	 sprintf (buf, "_%04x", ch);
+	 obstack_grow (mangle_obstack, buf, 5);
+	 }
 	}
 else
-	{
-	 obstack_1grow (mangle_obstack, ch);
-	}
+	obstack_1grow (mangle_obstack, ch);
 }
 }
 
 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
- appropriately mangled (with Unicode escapes if needed) to OBSTACK. */
+ appropriately mangled (with Unicode escapes if needed) to
+ MANGLE_OBSTACK. */
 
 static void
 append_gpp_mangled_name (name, len)
@@ -312,18 +311,14 @@ append_gpp_mangled_name (name, len)
 int encoded_len = unicode_mangling_length (name, len);
 int needs_escapes = encoded_len > 0;
 char buf[6];
+ 
+ sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
+ obstack_grow (mangle_obstack, buf, strlen (buf));
+ 
 if (needs_escapes)
- {
- sprintf (buf, "U%d", encoded_len);
- obstack_grow (mangle_obstack, buf, strlen(buf));
- append_unicode_mangled_name (name, len);
- }
+ append_unicode_mangled_name (name, len);
 else
- {
- sprintf (buf, "%d", len);
- obstack_grow (mangle_obstack, buf, strlen(buf));
- obstack_grow (mangle_obstack, name, len);
- }
+ obstack_grow (mangle_obstack, name, len);
 }
 
 /* Append the mangled name of TYPE onto OBSTACK. */


More information about the Java mailing list

AltStyle によって変換されたページ (->オリジナル) /