From: Eric Blake Date: 2007年12月11日 16:36:08 +0000 (-0700) Subject: Stage 7: use chained input support in input parser. X-Git-Url: https://gitweb.git.savannah.gnu.org/gitweb/?a=commitdiff_plain;h=8f276ee65769eae767a638b98d354fe1c3b2700d;p=m4.git Stage 7: use chained input support in input parser. * m4/m4private.h (m4__push_symbol): New prototype. (struct m4_symbol_chain): Add const-safety. * m4/symtab.c (m4_symbol_value_print): Simplify. (dump_symbol_CB): Update caller. * m4/input.c (struct m4_input_block): Alter u_c member, first introduced 2006年10月25日, but unused until now. (composite_peek, composite_read, composite_unget) (composite_print, init_builtin_token): Rewrite accordingly. (m4_push_wrapup): No longer need trailing NUL. (m4__push_symbol, make_text_link): New functions. (m4_push_string_finish): Use them. * m4/macro.c (m4_push_arg, m4_push_args): Likewise. (expand_macro): Simplify logic of nesting_limit. * src/main.c (main): Likewise. * doc/m4.texinfo (Dumpdef): Augment test. Signed-off-by: Eric Blake --- diff --git a/ChangeLog b/ChangeLog index e79d63a7..0ec0df76 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2007年12月11日 Eric Blake + + Stage 7: use chained input support in input parser. + * m4/m4private.h (m4__push_symbol): New prototype. + (struct m4_symbol_chain): Add const-safety. + * m4/symtab.c (m4_symbol_value_print): Simplify. + (dump_symbol_CB): Update caller. + * m4/input.c (struct m4_input_block): Alter u_c member, first + introduced 2006年10月25日, but unused until now. + (composite_peek, composite_read, composite_unget) + (composite_print, init_builtin_token): Rewrite accordingly. + (m4_push_wrapup): No longer need trailing NUL. + (m4__push_symbol, make_text_link): New functions. + (m4_push_string_finish): Use them. + * m4/macro.c (m4_push_arg, m4_push_args): Likewise. + (expand_macro): Simplify logic of nesting_limit. + * src/main.c (main): Likewise. + * doc/m4.texinfo (Dumpdef): Augment test. + 2007年12月08日 Eric Blake Stage 6: convert builtins to push arg at a time. diff --git a/doc/m4.texinfo b/doc/m4.texinfo index dcadc67d..14017d81 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -3450,7 +3450,10 @@ dumpdef(`foo', `dnl', `indir', `__gnu__') @error{}foo:@tabchar{}3, @{m4@}, 1 l... @error{}indir:@tabchar{}@{gnu@} @result{} -debugmode(`-m') +debugmode(`-ms')debugmode(`+q') +@result{} +dumpdef(`foo') +@error{}foo:@tabchar{}`3' @result{} @end example diff --git a/m4/input.c b/m4/input.c index fd0e677e..28b00a87 100644 --- a/m4/input.c +++ b/m4/input.c @@ -105,6 +105,8 @@ static int composite_read (m4_input_block *, m4 *, bool); static void composite_unget (m4_input_block *, int); static void composite_print (m4_input_block *, m4 *, m4_obstack *); +static void make_text_link (m4_obstack *, m4_symbol_chain **, + m4_symbol_chain **); static void init_builtin_token (m4 *, m4_symbol_value *); static bool match_input (m4 *, const char *, bool); static int next_char (m4 *, bool); @@ -125,9 +127,9 @@ struct input_funcs int (*peek_func) (m4_input_block *); /* Read input, return an unsigned char, CHAR_BUILTIN if it is a - builtin, or CHAR_RETRY if none available. If the flag is false, - then do not alter the current file or line. */ - int (*read_func) (m4_input_block *, m4 *, bool); + builtin, or CHAR_RETRY if none available. If SAFE, then do not + alter the current file or line. */ + int (*read_func) (m4_input_block *, m4 *, bool safe); /* Unread a single unsigned character or CHAR_BUILTIN, must be the same character previously read by read_func. */ @@ -176,8 +178,8 @@ struct m4_input_block u_b; /* See builtin_funcs. */ struct { - m4_input_block *current; /* Pointer to current sub-block. */ - m4_input_block *tail; /* Pointer to last sub-block. */ + m4_symbol_chain *chain; /* Current link in chain. */ + m4_symbol_chain *end; /* Last link in chain. */ } u_c; /* See composite_funcs. */ } @@ -219,12 +221,28 @@ static bool start_of_input_line; /* Flag for next_char () to recognize change in input block. */ static bool input_change; - -/* Input files, from command line or [s]include. */ +/* Vtable for handling input from files. */ static struct input_funcs file_funcs = { file_peek, file_read, file_unget, file_clean, file_print }; +/* Vtable for handling input from builtin functions. */ +static struct input_funcs builtin_funcs = { + builtin_peek, builtin_read, builtin_unget, NULL, builtin_print +}; + +/* Vtable for handling input from strings. */ +static struct input_funcs string_funcs = { + string_peek, string_read, string_unget, NULL, string_print +}; + +/* Vtable for handling input from composite chains. */ +static struct input_funcs composite_funcs = { + composite_peek, composite_read, composite_unget, NULL, composite_print +}; + + +/* Input files, from command line or [s]include. */ static int file_peek (m4_input_block *me) { @@ -242,7 +260,7 @@ file_peek (m4_input_block *me) } static int -file_read (m4_input_block *me, m4 *context, bool retry M4_GNUC_UNUSED) +file_read (m4_input_block *me, m4 *context, bool safe M4_GNUC_UNUSED) { int ch; @@ -356,10 +374,6 @@ m4_push_file (m4 *context, FILE *fp, const char *title, bool close_file) /* Handle a builtin macro token. */ -static struct input_funcs builtin_funcs = { - builtin_peek, builtin_read, builtin_unget, NULL, builtin_print -}; - static int builtin_peek (m4_input_block *me) { @@ -371,7 +385,7 @@ builtin_peek (m4_input_block *me) static int builtin_read (m4_input_block *me, m4 *context M4_GNUC_UNUSED, - bool retry M4_GNUC_UNUSED) + bool safe M4_GNUC_UNUSED) { if (me->u.u_b.read) return CHAR_RETRY; @@ -445,10 +459,6 @@ m4_push_builtin (m4 *context, m4_symbol_value *token) /* Handle string expansion text. */ -static struct input_funcs string_funcs = { - string_peek, string_read, string_unget, NULL, string_print -}; - static int string_peek (m4_input_block *me) { @@ -457,7 +467,7 @@ string_peek (m4_input_block *me) static int string_read (m4_input_block *me, m4 *context M4_GNUC_UNUSED, - bool retry M4_GNUC_UNUSED) + bool safe M4_GNUC_UNUSED) { if (!me->u.u_s.len) return CHAR_RETRY; @@ -483,8 +493,9 @@ string_print (m4_input_block *me, m4 *context, m4_obstack *obs) quote, &arg_length); } -/* First half of m4_push_string (). The pointer next points to the new - input_block. */ +/* First half of m4_push_string (). The pointer next points to the + new input_block. Return the obstack that will collect the + expansion text. */ m4_obstack * m4_push_string_init (m4 *context) { @@ -501,6 +512,54 @@ m4_push_string_init (m4 *context) return current_input; } +/* If VALUE contains text, then convert the current string into a + chain if it is not one already, and add the contents of VALUE as a + new link in the chain. LEVEL describes the current expansion + level, or SIZE_MAX if the contents of VALUE reside entirely on the + current_input stack and VALUE lives in temporary storage. Allows + gathering input from multiple locations, rather than copying + everything consecutively onto the input stack. Must be called + between push_string_init and push_string_finish. */ +void +m4__push_symbol (m4_symbol_value *value, size_t level) +{ + m4_symbol_chain *chain; + + assert (next); + /* TODO - also accept TOKEN_COMP chains. */ + assert (m4_is_symbol_value_text (value)); + if (m4_get_symbol_value_len (value) == 0) + return; + + if (next->funcs == &string_funcs) + { + next->funcs = &composite_funcs; + next->u.u_c.chain = next->u.u_c.end = NULL; + } + make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end); + chain = (m4_symbol_chain *) obstack_alloc (current_input, sizeof *chain); + if (next->u.u_c.end) + next->u.u_c.end->next = chain; + else + next->u.u_c.chain = chain; + next->u.u_c.end = chain; + chain->next = NULL; + if (level != SIZE_MAX) + /* TODO - use token as-is, rather than copying data. This implies + lengthening lifetime of $@ arguments until the rescan is + complete, rather than the current approach of freeing them + during expand_macro. */ + chain->str = (char *) obstack_copy (current_input, + m4_get_symbol_value_text (value), + m4_get_symbol_value_len (value)); + else + chain->str = m4_get_symbol_value_text (value); + chain->len = m4_get_symbol_value_len (value); + chain->argv = NULL; + chain->index = 0; + chain->flatten = false; +} + /* Last half of m4_push_string (). If next is now NULL, a call to m4_push_file () or m4_push_builtin () has pushed a different input block to the top of the stack. If the new object is void, we do @@ -522,11 +581,15 @@ m4_push_string_finish (void) return isp; } - if (len) + if (len || next->funcs == &composite_funcs) { - next->u.u_s.len = len; - obstack_1grow (current_input, '0円'); - next->u.u_s.str = obstack_finish (current_input); + if (next->funcs == &string_funcs) + { + next->u.u_s.str = (char *) obstack_finish (current_input); + next->u.u_s.len = len; + } + else + make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end); next->prev = isp; ret = isp = next; input_change = true; @@ -541,78 +604,126 @@ m4_push_string_finish (void) /* A composite block contains multiple sub-blocks which are processed in FIFO order, even though the obstack allocates memory in LIFO order. */ -static struct input_funcs composite_funcs = { - composite_peek, composite_read, composite_unget, NULL, composite_print -}; - static int composite_peek (m4_input_block *me) { - int ch = CHAR_RETRY; - m4_input_block *subblock = me->u.u_c.current; - while (ch == CHAR_RETRY && subblock != NULL) + m4_symbol_chain *chain = me->u.u_c.chain; + while (chain) { - ch = subblock->funcs->peek_func (subblock); - subblock = subblock->prev; + if (chain->str) + { + if (chain->len) + return to_uchar (chain->str[0]); + } + else + { + /* TODO - peek into argv. */ + assert (!"implemented yet"); + abort (); + } + chain = chain->next; } - return ch; + return CHAR_RETRY; } static int -composite_read (m4_input_block *me, m4 *context, bool retry) +composite_read (m4_input_block *me, m4 *context, bool safe) { - int ch; - m4_input_block *subblock; - - /* Check if input exhausted. */ - subblock = me->u.u_c.current; - if (subblock == NULL) - return CHAR_RETRY; - - /* See if current block has real character. */ - ch = subblock->funcs->read_func (subblock, context, retry); - if (ch != CHAR_RETRY || !retry) + m4_symbol_chain *chain = me->u.u_c.chain; + while (chain) { - me->line = subblock->line; - return ch; + if (chain->str) + { + if (chain->len) + { + chain->len--; + return to_uchar (*chain->str++); + } + } + else + { + /* TODO - peek into argv. */ + assert (!"implemented yet"); + abort (); + } + if (safe) + return CHAR_RETRY; + me->u.u_c.chain = chain = chain->next; } + return CHAR_RETRY; +} - /* Pop blocks until we get real character. */ - while (ch == CHAR_RETRY && subblock != NULL) +static void +composite_unget (m4_input_block *me, int ch) +{ + m4_symbol_chain *chain = me->u.u_c.chain; + if (chain->str) { - if (subblock->funcs->clean_func) - subblock->funcs->clean_func (subblock, context); - subblock = me->u.u_c.current->prev; - me->u.u_c.current = subblock; - input_change = true; - if (subblock) - ch = subblock->funcs->read_func (subblock, context, retry); + assert (ch < CHAR_EOF && to_uchar (chain->str[-1]) == ch); + chain->str--; + chain->len++; + } + else + { + /* TODO support argv ref. */ + assert (!"implemented yet"); + abort (); } - me->file = subblock->file; - me->line = subblock->line; - return ch; } static void -composite_unget (m4_input_block *me, int ch) +composite_print (m4_input_block *me, m4 *context, m4_obstack *obs) { - assert (me->u.u_c.current); - me->u.u_c.current->funcs->unget_func (me->u.u_c.current, ch); + bool quote = m4_is_debug_bit (context, M4_DEBUG_TRACE_QUOTE); + size_t maxlen = m4_get_max_debug_arg_length_opt (context); + m4_symbol_chain *chain = me->u.u_c.chain; + const char *lquote = m4_get_syntax_lquote (M4SYNTAX); + const char *rquote = m4_get_syntax_rquote (M4SYNTAX); + + if (quote) + m4_shipout_string (context, obs, lquote, SIZE_MAX, false); + while (chain) + { + /* TODO support argv refs as well. */ + assert (chain->str); + if (m4_shipout_string_trunc (context, obs, chain->str, chain->len, false, + &maxlen)) + break; + chain = chain->next; + } + if (quote) + m4_shipout_string (context, obs, rquote, SIZE_MAX, false); } +/* Given an obstack OBS, capture any unfinished text as a link in the + chain that starts at *START and ends at *END. START may be NULL if + *END is non-NULL. */ static void -composite_print (m4_input_block *me, m4 *context, m4_obstack *obs) +make_text_link (m4_obstack *obs, m4_symbol_chain **start, + m4_symbol_chain **end) { - m4_input_block *subblock = me->u.u_c.current; - while (subblock) + m4_symbol_chain *chain; + size_t len = obstack_object_size (obs); + + assert (end && (start || *end)); + if (len) { - subblock->funcs->print_func (subblock, context, obs); - subblock = subblock->prev; + char *str = (char *) obstack_finish (obs); + chain = (m4_symbol_chain *) obstack_alloc (obs, sizeof *chain); + if (*end) + (*end)->next = chain; + else + *start = chain; + *end = chain; + chain->next = NULL; + chain->str = str; + chain->len = len; + chain->argv = NULL; + chain->index = 0; + chain->flatten = false; } } -/*TODO FIXME - in m4_push_file/m4_push_builtin, if next is not NULL, - call create_composite. */ /* When tracing, print a summary of the contents of the input block @@ -660,7 +771,7 @@ m4_push_wrapup (m4 *context, const char *s) i->line = m4_get_current_line (context); i->u.u_s.len = strlen (s); - i->u.u_s.str = obstack_copy0 (wrapup_stack, s, i->u.u_s.len); + i->u.u_s.str = obstack_copy (wrapup_stack, s, i->u.u_s.len); wsp = i; } @@ -742,8 +853,6 @@ static void init_builtin_token (m4 *context, m4_symbol_value *token) { m4_input_block *block = isp; - if (block->funcs == &composite_funcs) - block = block->u.u_c.current; assert (block->funcs->read_func == builtin_read && !block->u.u_b.read); m4_set_symbol_value_builtin (token, block->u.u_b.builtin); @@ -780,7 +889,7 @@ next_char (m4 *context, bool retry) } assert (isp->funcs->read_func); - while ((ch = isp->funcs->read_func (isp, context, retry)) != CHAR_RETRY + while ((ch = isp->funcs->read_func (isp, context, !retry)) != CHAR_RETRY || !retry) { /* if (!IS_IGNORE (ch)) */ diff --git a/m4/m4private.h b/m4/m4private.h index db1f5136..b93d876c 100644 --- a/m4/m4private.h +++ b/m4/m4private.h @@ -193,7 +193,7 @@ struct m4_symbol struct m4_symbol_chain { m4_symbol_chain *next;/* Pointer to next link of chain. */ - char *str; /* NUL-terminated string if text, or NULL. */ + const char *str; /* NUL-terminated string if text, or NULL. */ size_t len; /* Length of str, or 0. */ m4_macro_args *argv; /* Reference to earlier $@. */ unsigned int index; /* Argument index within argv. */ @@ -432,6 +432,7 @@ typedef enum { M4_TOKEN_MACDEF /* Macro's definition (see "defn"), M4_SYMBOL_FUNC. */ } m4__token_type; +extern void m4__push_symbol (m4_symbol_value *, size_t); extern m4__token_type m4__next_token (m4 *, m4_symbol_value *, int *, const char *); extern bool m4__next_token_is_open (m4 *); diff --git a/m4/macro.c b/m4/macro.c index b3080eb6..1108d081 100644 --- a/m4/macro.c +++ b/m4/macro.c @@ -329,9 +329,7 @@ expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol) /* Prepare for macro expansion. */ VALUE_PENDING (value)++; - expansion_level++; - if (m4_get_nesting_limit_opt (context)> 0 - && expansion_level> m4_get_nesting_limit_opt (context)) + if (m4_get_nesting_limit_opt (context) < ++expansion_level) m4_error (context, EXIT_FAILURE, 0, NULL, _("\ recursion limit of %zu exceeded, use -L to change it"), m4_get_nesting_limit_opt (context)); @@ -968,9 +966,7 @@ m4_push_arg (m4 *context, m4_obstack *obs, m4_macro_args *argv, return; /* TODO handle builtin tokens? */ assert (value->type == M4_SYMBOL_TEXT); - /* TODO push a reference, rather than copying data. */ - obstack_grow (obs, m4_get_symbol_value_text (value), - m4_get_symbol_value_len (value)); + m4__push_symbol (value, expansion_level - 1); } /* Push series of comma-separated arguments from ARGV, which should @@ -982,22 +978,49 @@ m4_push_args (m4 *context, m4_obstack *obs, m4_macro_args *argv, bool skip, bool quote) { m4_symbol_value *value; - unsigned int i; - bool comma = false; + m4_symbol_value sep; + unsigned int i = skip ? 2 : 1; + bool use_sep = false; + const char *lquote = m4_get_syntax_lquote (M4SYNTAX); + const char *rquote = m4_get_syntax_rquote (M4SYNTAX); + + if (argv->argc <= i) + return; + + if (quote) + { + const char *str; + size_t len; + obstack_grow (obs, lquote, strlen (lquote)); + len = obstack_object_size (obs); + obstack_1grow (obs, '0円'); + str = (char *) obstack_finish (obs); + m4_set_symbol_value_text (&sep, str, len, 0); + m4__push_symbol (&sep, SIZE_MAX); + obstack_grow (obs, rquote, strlen (rquote)); + obstack_1grow (obs, ','); + obstack_grow0 (obs, lquote, strlen (lquote)); + str = (char *) obstack_finish (obs); + m4_set_symbol_value_text (&sep, str, + strlen (rquote) + 1 + strlen (lquote), 0); + } + else + m4_set_symbol_value_text (&sep, ",", 1, 0); - /* TODO push reference, rather than copying data. */ - for (i = skip ? 2 : 1; i < argv->argc; i++) + /* TODO push entire $@ ref, rather than each arg. */ + for ( ; i < argv->argc; i++) { value = m4_arg_symbol (argv, i); - if (comma) - obstack_1grow (obs, ','); + if (use_sep) + m4__push_symbol (&sep, SIZE_MAX); else - comma = true; + use_sep = true; /* TODO handle builtin tokens? */ assert (value->type == M4_SYMBOL_TEXT); - m4_shipout_string (context, obs, m4_get_symbol_value_text (value), - m4_get_symbol_value_len (value), quote); + m4__push_symbol (value, expansion_level - 1); } + if (quote) + obstack_grow (obs, rquote, strlen (rquote)); } diff --git a/m4/symtab.c b/m4/symtab.c index 932a31fa..95ed36ed 100644 --- a/m4/symtab.c +++ b/m4/symtab.c @@ -476,13 +476,13 @@ m4_set_symbol_name_traced (m4_symbol_table *symtab, const char *name, } /* Grow OBS with a text representation of VALUE. If QUOTE, then - surround a text definition by LQUOTE and RQUOTE. If ARG_LENGTH is - non-zero, then truncate text definitions to that length. If + surround a text definition by LQUOTE and RQUOTE. If MAXLEN is less + than SIZE_MAX, then truncate text definitions to that length. If MODULE, then include which module defined a builtin. */ void m4_symbol_value_print (m4_symbol_value *value, m4_obstack *obs, bool quote, - const char *lquote, const char *rquote, - size_t arg_length, bool module) + const char *lquote, const char *rquote, size_t maxlen, + bool module) { const char *text; size_t len; @@ -492,9 +492,9 @@ m4_symbol_value_print (m4_symbol_value *value, m4_obstack *obs, bool quote, { text = m4_get_symbol_value_text (value); len = m4_get_symbol_value_len (value); - if (arg_length && arg_length < len) + if (maxlen < len) { - len = arg_length; + len = maxlen; truncated = true; } } @@ -541,8 +541,8 @@ m4_symbol_value_print (m4_symbol_value *value, m4_obstack *obs, bool quote, /* Grow OBS with a text representation of SYMBOL. If QUOTE, then surround each text definition by LQUOTE and RQUOTE. If STACK, then append all pushdef'd values, rather than just the top. If - ARG_LENGTH is non-zero, then truncate text definitions to that - length. If MODULE, then include which module defined a + ARG_LENGTH is less than SIZE_MAX, then truncate text definitions to + that length. If MODULE, then include which module defined a builtin. */ void m4_symbol_print (m4_symbol *symbol, m4_obstack *obs, bool quote, @@ -698,7 +698,7 @@ m4_get_symbol_value_placeholder (m4_symbol_value *value) #undef m4_set_symbol_value_text void m4_set_symbol_value_text (m4_symbol_value *value, const char *text, size_t len, - unsigned int quote_age) + unsigned int quote_age) { assert (value && text); /* TODO - this assertion enforces NUL-terminated text with no @@ -768,7 +768,7 @@ dump_symbol_CB (m4_symbol_table *symtab, const char *name, { m4_obstack obs; obstack_init (&obs); - m4_symbol_value_print (value, &obs, false, NULL, NULL, 0, true); + m4_symbol_value_print (value, &obs, false, NULL, NULL, SIZE_MAX, true); xfprintf (stderr, "%s", (char *) obstack_finish (&obs)); obstack_free (&obs, NULL); } diff --git a/src/main.c b/src/main.c index 344db58f..48d99d27 100644 --- a/src/main.c +++ b/src/main.c @@ -482,6 +482,8 @@ main (int argc, char *const *argv, char *const *envp) case 'L': size = size_opt (optarg, oi, optchar); + if (!size) + size = SIZE_MAX; m4_set_nesting_limit_opt (context, size); break;

AltStyle によって変換されたページ (->オリジナル) /