Original PR and Discussion: https://github.com/ziglang/zig/pull/23596
Currently std.zon.stringify.serialize always escapes Unicode characters, while std.json.stringify by default does not. This change adds an escape_non_ascii option that matches the JSON serializer's behavior.
(削除) To maintain backward compatibility, the default value is true, preserving the current behavior of escaping Unicode. (削除ここまで)
escape_non_ascii is false by default, and will only escape unicode once set to true.
Before and after
test"before"{constbuff=.{.name="Test",.description="⚡ Lightning Bolt",.emoji="⚡"};varaw:std.Io.Writer.Allocating=.init(std.testing.allocator);deferaw.deinit();trystd.zon.stringify.serialize(buff,.{.whitespace=true},&aw.writer);std.debug.print("\nBefore:\n{s}\n",.{aw.written()});}test"after"{constbuff=.{.name="Test",.description="⚡ Lightning Bolt",.emoji="⚡"};varaw:std.Io.Writer.Allocating=.init(std.testing.allocator);deferaw.deinit();tryzon_after.stringify.serialize(buff,.{.whitespace=true},&aw.writer);std.debug.print("\nAfter:\n{s}\n",.{aw.written()});}conststd=@import("std");constzon_after=@import("./lib/std/zon.zig");constzon_before=std.zon;
Output:
Before:
.{
.name = "Test",
.description = "\xe2\x9a\xa1 Lightning Bolt",
.emoji = "\xe2\x9a\xa1",
}
After:
.{
.name = "Test",
.description = "⚡ Lightning Bolt",
.emoji = "⚡",
}
All 44 tests passed.
More before and after
Generated using script here.
| Options Diff (Before → After) |
Output Diff (Before → After) |
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = true,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a\u{26a1}\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "\u{26a1}",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .always,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
- .emoji_char = '\u{1f60a}',
+ .emoji_char = '😊',
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .always,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = true,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a\u{26a1}\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "\u{26a1}",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
.emoji_char = '\u{1f60a}',
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .always,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
- .emoji_char = '\u{1f60a}',
+ .emoji_char = '😊',
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .printable_ascii,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .printable_ascii,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = true,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a\u{26a1}\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "\u{26a1}",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .printable_ascii,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = true,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a\u{26a1}\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "\u{26a1}",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
|
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
|
Use Case
I was trying to store Unicode data in a ZON file, which I previously did in JSON. When converting from JSON to ZON using the JSON parser and ZON serializer, the Unicode characters were always escaped. This made the ZON file hard to read, which defeats its purpose as a human-readable format.
Closes #23535
Original PR and Discussion: https://github.com/ziglang/zig/pull/23596
Currently std.zon.stringify.serialize always escapes Unicode characters, while std.json.stringify by default does not. This change adds an escape_non_ascii option that matches the JSON serializer's behavior.
~~To maintain backward compatibility, the default value is true, preserving the current behavior of escaping Unicode.~~
`escape_non_ascii` is false by default, and will only escape unicode once set to true.
#### Before and after
```zig
test "before" {
const buff = .{ .name = "Test", .description = "⚡ Lightning Bolt", .emoji = "⚡" };
var aw: std.Io.Writer.Allocating = .init(std.testing.allocator);
defer aw.deinit();
try std.zon.stringify.serialize(buff, .{ .whitespace = true }, &aw.writer);
std.debug.print("\nBefore:\n{s}\n", .{aw.written()});
}
test "after" {
const buff = .{ .name = "Test", .description = "⚡ Lightning Bolt", .emoji = "⚡" };
var aw: std.Io.Writer.Allocating = .init(std.testing.allocator);
defer aw.deinit();
try zon_after.stringify.serialize(buff, .{ .whitespace = true }, &aw.writer);
std.debug.print("\nAfter:\n{s}\n", .{aw.written()});
}
const std = @import("std");
const zon_after = @import("./lib/std/zon.zig");
const zon_before = std.zon;
```
Output:
```
Before:
.{
.name = "Test",
.description = "\xe2\x9a\xa1 Lightning Bolt",
.emoji = "\xe2\x9a\xa1",
}
After:
.{
.name = "Test",
.description = "⚡ Lightning Bolt",
.emoji = "⚡",
}
All 44 tests passed.
```
#### More before and after
Generated using script [here](https://gist.github.com/nurulhudaapon/50889dd6459ac7a14e4b4bb9e075e56b).
<table>
<thead>
<tr>
<th>Options Diff (Before → After)</th>
<th>Output Diff (Before → After)</th>
</tr>
</thead>
<tbody>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = true,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a\u{26a1}\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "\u{26a1}",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .always,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
- .emoji_char = '\u{1f60a}',
+ .emoji_char = '😊',
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .always,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = true,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a\u{26a1}\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "\u{26a1}",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
.emoji_char = '\u{1f60a}',
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .always,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
- .emoji_char = '\u{1f60a}',
+ .emoji_char = '😊',
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .printable_ascii,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .printable_ascii,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = true,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a\u{26a1}\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "\u{26a1}",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .printable_ascii,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = '"',
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = true,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a\u{26a1}\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "\u{26a1}",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
<tr>
<td>
```diff
.{
.whitespace = true,
.emit_codepoint_literals = .never,
.emit_strings_as_containers = false,
.emit_default_optional_fields = true,
-}
+ .escape_non_ascii = false,
+}
```
</td>
<td>
```diff
.{
- .text_emoji = "a\xe2\x9a\xa1\n",
+ .text_emoji = "a⚡\n",
- .emoji_str = "\xe2\x9a\xa1",
+ .emoji_str = "⚡",
.dq_inside_dq = "\"",
.sq_inside_sq = "'",
.dq_inside_sq = 34,
.sq_inside_dq = "'",
.emoji_char = 128522,
}
```
</td>
</tr>
</tbody>
</table>
### Use Case
I was trying to store Unicode data in a ZON file, which I previously did in JSON. When converting from JSON to ZON using the JSON parser and ZON serializer, the Unicode characters were always escaped. This made the ZON file hard to read, which defeats its purpose as a human-readable format.
Closes #23535