[PATCH] Java: mangling of unicode characters.

Alexandre Petit-Bianco apbianco@cygnus.com
Thu Feb 8 16:29:00 GMT 2001


Jason Merrill writes:
> Yes, that's what I meant.

OK. So I'm checking this in. Java hackers will have to rebuild their
runtime entirely. Note that it's been tested on x86/alpha/PPC linux.
./A
2001年02月08日 Alexandre Petit-Bianco <apbianco@cygnus.com>
	* Make-lang.in (JAVA_OBJS): Added java/mangle_name.o
	(JVGENMAIN_OBJS): Likewise.
	* java-tree.h (append_gpp_mangled_name): New prototype.	
	* jcf-parse.c (ggc_mark_jcf): Argument now `void *.'
	Removed cast calling `gcc_add_root.'
	* jvgenmain.c (mangle_obstack): New global, initialized.
	(main): Use it.
	(do_mangle_class): Constify local `ptr.'
	Removed macro `MANGLE_NAME.' Removed cast in `for.' Call
	append_gpp_mangle_name and update `count' if necessary.
	Use `mangle_obstack.'
	* mangle.c (append_unicode_mangled_name): Removed.
	(append_gpp_mangled_name): Likewise.
	(unicode_mangling_length): Likewise.
	(mangle_member_name): Return type set to `void.'
	(mangle_field_decl): Don't append `U' in escaped names.
	(mangle_method_decl): Likewise.
	(mangle_member_name): Just use `append_gpp_mangled_name.'
	* mangle_name.c: New file.
Index: Make-lang.in
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/Make-lang.in,v
retrieving revision 1.50
diff -u -p -r1.50 Make-lang.in
--- Make-lang.in	2001年02月05日 05:46:15	1.50
+++ Make-lang.in	2001年02月08日 18:59:39
@@ -100,6 +100,7 @@ $(srcdir)/java/keyword.h: $(srcdir)/java
 JAVA_OBJS = java/parse.o java/class.o java/decl.o java/expr.o \
 java/constants.o java/lang.o java/typeck.o java/except.o java/verify.o \
 java/zextract.o java/jcf-io.o java/jcf-parse.o java/mangle.o \
+ java/mangle_name.o \
 java/jcf-write.o java/buffer.o java/check-init.o java/jcf-depend.o \
 java/jcf-path.o java/xref.o java/boehm.o mkdeps.o
 
@@ -111,7 +112,7 @@ JVSCAN_OBJS = java/parse-scan.o java/jv-
 JCFDUMP_OBJS = java/jcf-dump.o java/jcf-io.o java/jcf-depend.o java/jcf-path.o \
 		java/zextract.o errors.o version.o mkdeps.o
 
-JVGENMAIN_OBJS = java/jvgenmain.o
+JVGENMAIN_OBJS = java/jvgenmain.o java/mangle_name.o
 
 # Use loose warnings for this front end.
 java-warn =
Index: java-tree.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/java-tree.h,v
retrieving revision 1.96
diff -u -p -r1.96 java-tree.h
--- java-tree.h	2001年02月04日 22:44:02	1.96
+++ java-tree.h	2001年02月08日 18:59:44
@@ -1115,6 +1115,7 @@ extern tree java_mangle_class_field PARA
 extern tree java_mangle_class_field_from_string PARAMS ((struct obstack *, char *));
 extern tree java_mangle_vtable PARAMS ((struct obstack *, tree));
 extern const char *lang_printable_name_wls PARAMS ((tree, int));
+extern void append_gpp_mangled_name PARAMS ((const char *, int));
 
 /* We use ARGS_SIZE_RTX to indicate that gcc/expr.h has been included
 to declare `enum expand_modifier'. */
Index: jvgenmain.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/jvgenmain.c,v
retrieving revision 1.20
diff -u -p -r1.20 jvgenmain.c
--- jvgenmain.c	2001年02月02日 00:15:43	1.20
+++ jvgenmain.c	2001年02月08日 18:59:44
@@ -34,7 +34,8 @@ The Free Software Foundation is independ
 
 static char * do_mangle_classname PARAMS ((const char *string));
 
-struct obstack name_obstack;
+struct obstack name_obstack;
+struct obstack *mangle_obstack = &name_obstack;
 
 void
 gcc_obstack_init (obstack)
@@ -92,7 +93,7 @@ main (int argc, const char **argv)
 
 classname = argv[i];
 
- gcc_obstack_init (&name_obstack);
+ gcc_obstack_init (mangle_obstack);
 mangled_classname = do_mangle_classname (classname);
 
 if (i < argc - 1 && strcmp (argv[i + 1], "-") != 0)
@@ -150,30 +151,22 @@ static char *
 do_mangle_classname (string)
 const char *string;
 {
- char *ptr;
+ const char *ptr;
 int count = 0;
 
-#define MANGLE_NAME()						\
- {								\
- char buffer [128];						\
- sprintf (buffer, "%d", count);				\
- obstack_grow (&name_obstack, buffer, strlen (buffer));	\
- obstack_grow (&name_obstack, & ptr [-count], count);	\
- count = 0;							\
- }
-
 obstack_grow (&name_obstack, "_ZN", 3);
 
- for (ptr = (char *)string; *ptr; ptr++ )
+ for (ptr = string; *ptr; ptr++ )
 {
 if (ptr[0] == '.')
 	{
-	 MANGLE_NAME ();
+	 append_gpp_mangled_name (&ptr [-count], count);
+	 count = 0;
 	}
 else
 	count++;
 }
- MANGLE_NAME ();
- obstack_grow0 (&name_obstack, "6class$E", 8);
- return obstack_finish (&name_obstack);
+ append_gpp_mangled_name (&ptr [-count], count);
+ obstack_grow (mangle_obstack, "6class$E", 8);
+ return obstack_finish (mangle_obstack);
 }
Index: mangle.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/mangle.c,v
retrieving revision 1.13
diff -u -p -r1.13 mangle.c
--- mangle.c	2001年02月04日 22:44:03	1.13
+++ mangle.c	2001年02月08日 18:59:46
@@ -56,10 +56,7 @@ static void init_mangling PARAMS ((struc
 static tree finish_mangling PARAMS ((void));
 static void compression_table_add PARAMS ((tree));
 
-static void append_unicode_mangled_name PARAMS ((const char *, int));
-static void append_gpp_mangled_name PARAMS ((const char *, int));
-static int unicode_mangling_length PARAMS ((const char *, int));
-static int mangle_member_name PARAMS ((tree));
+static void mangle_member_name PARAMS ((tree));
 
 /* We use an incoming obstack, always to be provided to the interface
 functions. */
@@ -122,19 +119,14 @@ static void
 mangle_field_decl (decl)
 tree decl;
 {
- tree name = DECL_NAME (decl);
- int field_name_needs_escapes = 0;
-
 /* Mangle the name of the this the field belongs to */
 mangle_record_type (DECL_CONTEXT (decl), /* from_pointer = */ 0);
 
 /* Mangle the name of the field */
- field_name_needs_escapes = mangle_member_name (name);
+ mangle_member_name (DECL_NAME (decl));
 
 /* Terminate the mangled name */
 obstack_1grow (mangle_obstack, 'E');
- if (field_name_needs_escapes)
- obstack_1grow (mangle_obstack, 'U');
 }
 
 /* This mangles a method decl, first mangling its name and then all
@@ -146,7 +138,6 @@ mangle_method_decl (mdecl)
 {
 tree method_name = DECL_NAME (mdecl);
 tree arglist;
- int method_name_needs_escapes = 0;
 
 /* Mangle the name of the type that contains mdecl */
 mangle_record_type (DECL_CONTEXT (mdecl), /* from_pointer = */ 0);
@@ -166,7 +157,7 @@ mangle_method_decl (mdecl)
 	obstack_grow (mangle_obstack, "C1", 2);
 }
 else
- method_name_needs_escapes = mangle_member_name (method_name);
+ mangle_member_name (method_name);
 obstack_1grow (mangle_obstack, 'E');
 
 /* We mangled type.methodName. Now onto the arguments. */
@@ -183,32 +174,19 @@ mangle_method_decl (mdecl)
 for (arg = arglist; arg != end_params_node; arg = TREE_CHAIN (arg))
 	mangle_type (TREE_VALUE (arg));
 }
-
- /* Terminate the mangled name */
- if (method_name_needs_escapes)
- obstack_1grow (mangle_obstack, 'U');
 }
 
 /* This mangles a member name, like a function name or a field
 name. Handle cases were `name' is a C++ keyword. Return a non zero
 value if unicode encoding was required. */
 
-static int
+static void
 mangle_member_name (name)
 tree name;
 {
- const char * name_string = IDENTIFIER_POINTER (name);
- int len = IDENTIFIER_LENGTH (name);
- int to_return = 0;
+ append_gpp_mangled_name (IDENTIFIER_POINTER (name),
+			 IDENTIFIER_LENGTH (name));
 
- if (unicode_mangling_length (name_string, len) > 0)
- {
- append_unicode_mangled_name (name_string, len);
- to_return = 1;
- }
- else
- append_gpp_mangled_name (name_string, len);
-
 /* If NAME happens to be a C++ keyword, add `$' or `.' or `_'. */
 if (cxx_keyword_p (IDENTIFIER_POINTER (name), IDENTIFIER_LENGTH (name)))
 {
@@ -221,102 +199,6 @@ mangle_member_name (name)
 obstack_1grow (mangle_obstack, '_');
 #endif /* NO_DOT_IN_LABEL */
 #endif /* NO_DOLLAR_IN_LABEL */
- }
-
- return to_return;
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate
- the length of the string as mangled (a la g++) including Unicode escapes.
- If no escapes are needed, return 0. */
-
-static int
-unicode_mangling_length (name, len)
- const char *name; 
- int len; 
-{
- const unsigned char *ptr;
- const unsigned char *limit = (const unsigned char *)name + len;
- int need_escapes = 0;
- int num_chars = 0;
- int underscores = 0;
- for (ptr = (const unsigned char *) name; ptr < limit; )
- {
- int ch = UTF8_GET(ptr, limit);
- if (ch < 0)
-	error ("internal error - invalid Utf8 name");
- if (ch >= '0' && ch <= '9')
-	need_escapes += num_chars == 0;
- else if (ch == '_')
-	underscores++;
- else if (ch != '$' && (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z'))
-	need_escapes++;
- num_chars++;
- }
- if (need_escapes)
- return num_chars + 4 * (need_escapes + underscores);
- else
- return 0;
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
- appropriately mangled (with Unicode escapes) to OBSTACK. */
-
-static void
-append_unicode_mangled_name (name, len)
- const char *name;
- int len;
-{
- const unsigned char *ptr;
- const unsigned char *limit = (const unsigned char *)name + len;
- for (ptr = (const unsigned char *) name; ptr < limit; )
- {
- int ch = UTF8_GET(ptr, limit);
- int emit_escape;
- if (ch < 0)
-	{
-	 error ("internal error - bad Utf8 string");
-	 break;
-	}
- if (ch >= '0' && ch <= '9')
-	emit_escape = (ptr == (const unsigned char *) name);
- else
-	emit_escape = (ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z');
- if (emit_escape)
-	{
-	 char buf[6];
-	 sprintf (buf, "_%04x", ch);
-	 obstack_grow (mangle_obstack, buf, 5);
-	}
- else
-	{
-	 obstack_1grow (mangle_obstack, ch);
-	}
- }
-}
-
-/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
- appropriately mangled (with Unicode escapes if needed) to OBSTACK. */
-
-static void
-append_gpp_mangled_name (name, len)
- const char *name;
- int len;
-{
- int encoded_len = unicode_mangling_length (name, len);
- int needs_escapes = encoded_len > 0;
- char buf[6];
- if (needs_escapes)
- {
- sprintf (buf, "U%d", encoded_len);
- obstack_grow (mangle_obstack, buf, strlen(buf));
- append_unicode_mangled_name (name, len);
- }
- else
- {
- sprintf (buf, "%d", len);
- obstack_grow (mangle_obstack, buf, strlen(buf));
- obstack_grow (mangle_obstack, name, len);
 }
 }
 
Index: mangle_name.c
===================================================================
RCS file: mangle_name.c
diff -N mangle_name.c
--- /dev/null	Tue May 5 13:32:27 1998
+++ mangle_name.c	Thu Feb 8 10:59:46 2001
@@ -0,0 +1,223 @@
+/* Shared functions related to mangling names for the GNU compiler
+ for the Java(TM) language.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. 
+
+Java and all Java-based marks are trademarks or registered trademarks
+of Sun Microsystems, Inc. in the United States and other countries.
+The Free Software Foundation is independent of Sun Microsystems, Inc. */
+
+/* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
+
+#include "config.h"
+#include "system.h"
+#include "jcf.h"
+#include "tree.h"
+#include "java-tree.h"
+#include "obstack.h"
+#include "toplev.h"
+#include "obstack.h"
+
+static void append_unicode_mangled_name PARAMS ((const char *, int));
+#ifndef HAVE_AS_UTF8
+static int unicode_mangling_length PARAMS ((const char *, int));
+#endif
+
+extern struct obstack *mangle_obstack;
+
+/* If the assembler doesn't support UTF8 in symbol names, some
+ characters might need to be escaped. */
+
+#ifndef HAVE_AS_UTF8
+
+/* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
+ appropriately mangled (with Unicode escapes if needed) to
+ MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so
+ frequently that they could be cached. */
+
+void
+append_gpp_mangled_name (name, len)
+ const char *name;
+ int len;
+{
+ int encoded_len = unicode_mangling_length (name, len);
+ int needs_escapes = encoded_len > 0;
+ char buf[6];
+
+ sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
+ obstack_grow (mangle_obstack, buf, strlen (buf));
+
+ if (needs_escapes)
+ append_unicode_mangled_name (name, len);
+ else
+ obstack_grow (mangle_obstack, name, len);
+}
+
+/* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
+ appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
+ Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
+ which case `__U' will be mangled `__U_'. `$' is mangled `$' or
+ __U24_ according to NO_DOLLAR_IN_LABEL. */
+
+static void
+append_unicode_mangled_name (name, len)
+ const char *name;
+ int len;
+{
+ const unsigned char *ptr;
+ const unsigned char *limit = (const unsigned char *)name + len;
+ int uuU = 0;
+ for (ptr = (const unsigned char *) name; ptr < limit; )
+ {
+ int ch = UTF8_GET(ptr, limit);
+
+ if ((ch >= '0' && ch <= '9')
+#ifndef NO_DOLLAR_IN_LABEL
+	 || ch == '$'
+#endif
+	 || (ch >= 'a' && ch <= 'z')
+	 || (ch >= 'A' && ch <= 'Z' && ch != 'U'))
+	obstack_1grow (mangle_obstack, ch);
+ /* Everything else needs encoding */
+ else
+	{
+	 char buf [9];
+	 if (ch == '_' || ch == 'U')
+	 {
+	 /* Prepare to recognize __U */
+	 if (ch == '_' && (uuU < 3))
+		{
+		 uuU++;
+		 obstack_1grow (mangle_obstack, ch);
+		}
+	 /* We recognize __U that we wish to encode
+ __U_. Finish the encoding. */
+	 else if (ch == 'U' && (uuU == 2))
+		{
+		 uuU = 0;
+		 obstack_grow (mangle_obstack, "U_", 2);
+		}
+	 continue;
+	 }
+	 sprintf (buf, "__U%x_", ch);
+	 obstack_grow (mangle_obstack, buf, strlen (buf));
+	 uuU = 0;
+	}
+ }
+}
+
+/* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
+ length of the string as mangled (a la g++) including Unicode
+ escapes. If no escapes are needed, return 0. */
+
+static int
+unicode_mangling_length (name, len)
+ const char *name; 
+ int len; 
+{
+ const unsigned char *ptr;
+ const unsigned char *limit = (const unsigned char *)name + len;
+ int need_escapes = 0;		/* Whether we need an escape or not */
+ int num_chars = 0;		/* Number of characters in the mangled name */
+ int uuU = 0;			/* Help us to find __U. 0: '_', 1: '__' */
+ for (ptr = (const unsigned char *) name; ptr < limit; )
+ {
+ int ch = UTF8_GET(ptr, limit);
+
+ if (ch < 0)
+	error ("internal error - invalid Utf8 name");
+ if ((ch >= '0' && ch <= '9')
+#ifndef NO_DOLLAR_IN_LABEL
+	 || ch == '$'
+#endif
+	 || (ch >= 'a' && ch <= 'z')
+	 || (ch >= 'A' && ch <= 'Z' && ch != 'U'))
+	num_chars++;
+ /* Everything else needs encoding */
+ else
+	{
+	 int encoding_length = 2;
+
+	 if (ch == '_' || ch == 'U')
+	 {
+	 /* Prepare to recognize __U */
+	 if (ch == '_' && (uuU < 3))
+		{
+		 num_chars++;
+		 uuU++;
+		}
+	 /* We recognize __U that we wish to encode __U_ */
+	 else if (ch == 'U' && (uuU == 2))
+		{
+		 num_chars += 2;
+		 need_escapes = 1;
+		 uuU = 0;
+		}
+	 continue;
+	 }
+	 
+	 if (ch > 0xff)
+	 encoding_length++;
+	 if (ch > 0xfff)
+	 encoding_length++;
+	 
+	 num_chars += (4 + encoding_length);
+	 need_escapes = 1;
+	 uuU = 0;
+	}
+ }
+ if (need_escapes)
+ return num_chars;
+ else
+ return 0;
+}
+
+#else
+
+/* The assembler supports UTF8, we don't use escapes. Mangling is
+ simply <N>NAME. <N> is the number of UTF8 encoded characters that
+ are found in NAME. Note that `java', `lang' and `Object' are used
+ so frequently that they could be cached. */
+
+void
+append_gpp_mangled_name (name, len)
+ const char *name;
+ int len;
+{
+ const unsigned char *ptr;
+ const unsigned char *limit = (const unsigned char *)name + len;
+ int encoded_len;
+ char buf [6];
+ 
+ /* Compute the length of the string we wish to mangle. */
+ for (encoded_len = 0, ptr = (const unsigned char *) name;
+ ptr < limit; encoded_len++)
+ {
+ int ch = UTF8_GET(ptr, limit);
+
+ if (ch < 0)
+	error ("internal error - invalid Utf8 name");
+ }
+
+ sprintf (buf, "%d", encoded_len);
+ obstack_grow (mangle_obstack, buf, strlen (buf));
+ obstack_grow (mangle_obstack, name, len);
+}
+
+#endif /* HAVE_AS_UTF8 */


More information about the Java mailing list

AltStyle によって変換されたページ (->オリジナル) /