[Python-checkins] cpython: streamline normalizer identification a bit

benjamin.peterson python-checkins at python.org
Mon Jan 16 23:23:08 CET 2012


http://hg.python.org/cpython/rev/0677f05d6ece
changeset: 74457:0677f05d6ece
user: Benjamin Peterson <benjamin at python.org>
date: Mon Jan 16 17:22:31 2012 -0500
summary:
 streamline normalizer identification a bit
files:
 Python/ast.c | 80 +++++++++++++++++++++++++++------------
 1 files changed, 55 insertions(+), 25 deletions(-)
diff --git a/Python/ast.c b/Python/ast.c
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -500,6 +500,8 @@
 char *c_encoding; /* source encoding */
 PyArena *c_arena; /* arena for allocating memeory */
 const char *c_filename; /* filename */
+ PyObject *c_normalize; /* Normalization function from unicodedata. */
+ PyObject *c_normalize_args; /* Normalization argument tuple. */
 };
 
 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
@@ -527,36 +529,54 @@
 #define COMP_LISTCOMP 1
 #define COMP_SETCOMP 2
 
+static int
+init_normalization(struct compiling *c)
+{
+ PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
+ if (!m)
+ return 0;
+ c->c_normalize = PyObject_GetAttrString(m, "normalize");
+ Py_DECREF(m);
+ if (!c->c_normalize)
+ return 0;
+ c->c_normalize_args = Py_BuildValue("(sN)", "NFKC", Py_None);
+ PyTuple_SET_ITEM(c->c_normalize_args, 1, NULL);
+ if (!c->c_normalize_args) {
+ Py_CLEAR(c->c_normalize);
+ return 0;
+ }
+ return 1;
+}
+
 static identifier
-new_identifier(const char* n, PyArena *arena)
+new_identifier(const char* n, struct compiling *c)
 {
- _Py_IDENTIFIER(normalize);
 PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
 if (!id)
 return NULL;
+ /* PyUnicode_DecodeUTF8 should always return a ready string. */
 assert(PyUnicode_IS_READY(id));
 /* Check whether there are non-ASCII characters in the
 identifier; if so, normalize to NFKC. */
 if (!PyUnicode_IS_ASCII(id)) {
- PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
 PyObject *id2;
- if (!m) {
+ if (!c->c_normalize && !init_normalization(c)) {
 Py_DECREF(id);
 return NULL;
 }
- id2 = _PyObject_CallMethodId(m, &PyId_normalize, "sO", "NFKC", id);
- Py_DECREF(m);
+ PyTuple_SET_ITEM(c->c_normalize_args, 1, id);
+ id2 = PyObject_Call(c->c_normalize, c->c_normalize_args, NULL);
 Py_DECREF(id);
 if (!id2)
 return NULL;
 id = id2;
 }
 PyUnicode_InternInPlace(&id);
- PyArena_AddPyObject(arena, id);
+ PyArena_AddPyObject(c->c_arena, id);
 return id;
 }
 
-#define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
+#define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
 
 /* This routine provides an invalid object for the syntax error.
 The outermost routine must unpack this error and create the
@@ -706,13 +726,14 @@
 stmt_ty s;
 node *ch;
 struct compiling c;
+ mod_ty res = NULL;
 
 if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
 c.c_encoding = "utf-8";
 if (TYPE(n) == encoding_decl) {
 #if 0
 ast_error(n, "encoding declaration in Unicode string");
- goto error;
+ goto out;
 #endif
 n = CHILD(n, 0);
 }
@@ -725,13 +746,14 @@
 }
 c.c_arena = arena;
 c.c_filename = filename;
+ c.c_normalize = c.c_normalize_args = NULL;
 
 k = 0;
 switch (TYPE(n)) {
 case file_input:
 stmts = asdl_seq_new(num_stmts(n), arena);
 if (!stmts)
- return NULL;
+ goto out;
 for (i = 0; i < NCH(n) - 1; i++) {
 ch = CHILD(n, i);
 if (TYPE(ch) == NEWLINE)
@@ -741,7 +763,7 @@
 if (num == 1) {
 s = ast_for_stmt(&c, ch);
 if (!s)
- goto error;
+ goto out;
 asdl_seq_SET(stmts, k++, s);
 }
 else {
@@ -750,42 +772,44 @@
 for (j = 0; j < num; j++) {
 s = ast_for_stmt(&c, CHILD(ch, j * 2));
 if (!s)
- goto error;
+ goto out;
 asdl_seq_SET(stmts, k++, s);
 }
 }
 }
- return Module(stmts, arena);
+ res = Module(stmts, arena);
+ break;
 case eval_input: {
 expr_ty testlist_ast;
 
 /* XXX Why not comp_for here? */
 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
 if (!testlist_ast)
- goto error;
- return Expression(testlist_ast, arena);
+ goto out;
+ res = Expression(testlist_ast, arena);
+ break;
 }
 case single_input:
 if (TYPE(CHILD(n, 0)) == NEWLINE) {
 stmts = asdl_seq_new(1, arena);
 if (!stmts)
- goto error;
+ goto out;
 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
 arena));
 if (!asdl_seq_GET(stmts, 0))
- goto error;
- return Interactive(stmts, arena);
+ goto out;
+ res = Interactive(stmts, arena);
 }
 else {
 n = CHILD(n, 0);
 num = num_stmts(n);
 stmts = asdl_seq_new(num, arena);
 if (!stmts)
- goto error;
+ goto out;
 if (num == 1) {
 s = ast_for_stmt(&c, n);
 if (!s)
- goto error;
+ goto out;
 asdl_seq_SET(stmts, 0, s);
 }
 else {
@@ -796,21 +820,27 @@
 break;
 s = ast_for_stmt(&c, CHILD(n, i));
 if (!s)
- goto error;
+ goto out;
 asdl_seq_SET(stmts, i / 2, s);
 }
 }
 
- return Interactive(stmts, arena);
+ res = Interactive(stmts, arena);
+ break;
 }
 default:
 PyErr_Format(PyExc_SystemError,
 "invalid node %d for PyAST_FromNode", TYPE(n));
- goto error;
+ goto out;
 }
- error:
+ out:
+ if (c.c_normalize) {
+ Py_DECREF(c.c_normalize);
+ PyTuple_SET_ITEM(c.c_normalize_args, 1, NULL);
+ Py_DECREF(c.c_normalize_args);
+ }
 ast_error_finish(filename);
- return NULL;
+ return res;
 }
 
 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /