#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
typedef unsigned short Py_UCS2;
typedef unsigned char Py_UCS1;
typedef unsigned int Py_UCS4;
typedef ssize_t Py_ssize_t;
typedef Py_ssize_t Py_hash_t;
#define _PyObject_HEAD_EXTRA \
 struct _object *_ob_next; \
 struct _object *_ob_prev;
typedef struct _object {
 _PyObject_HEAD_EXTRA
 Py_ssize_t ob_refcnt;
 struct _typeobject *ob_type;
} PyObject;
#define PyObject_HEAD PyObject ob_base;
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
 structure. state.ascii and state.compact are set, and the data
 immediately follow the structure. utf8_length and wstr_length can be found
 in the length field; the utf8 pointer is equal to the data pointer. */
typedef struct {
 /* There are 4 forms of Unicode strings:
 - compact ascii:
 * structure = PyASCIIObject
 * kind = PyUnicode_1BYTE_KIND
 * compact = 1
 * ascii = 1
 * ready = 1
 * (length is the length of the utf8 and wstr strings)
 * (data starts just after the structure)
 * (since ASCII is decoded from UTF-8, the utf8 string are the data)
 - compact:
 * structure = PyCompactUnicodeObject
 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
 PyUnicode_4BYTE_KIND
 * compact = 1
 * ready = 1
 * ascii = 0
 * utf8 is not shared with data
 * utf8_length = 0 if utf8 is NULL
 * wstr is shared with data and wstr_length=length
 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
 * wstr_length = 0 if wstr is NULL
 * (data starts just after the structure)
 - legacy string, not ready:
 * structure = PyUnicodeObject
 * kind = PyUnicode_WCHAR_KIND
 * compact = 0
 * ascii = 0
 * ready = 0
 * wstr is not NULL
 * data.any is NULL
 * utf8 is NULL
 * utf8_length = 0
 * interned = SSTATE_NOT_INTERNED
 - legacy string, ready:
 * structure = PyUnicodeObject structure
 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
 PyUnicode_4BYTE_KIND
 * compact = 0
 * ready = 1
 * data.any is not NULL
 * utf8 is shared and utf8_length = length with data.any if ascii = 1
 * utf8_length = 0 if utf8 is NULL
 * wstr is shared and wstr_length = length with data.any
 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
 * wstr_length = 0 if wstr is NULL
 Compact strings use only one memory block (structure + characters),
 whereas legacy strings use one block for the structure and one block
 for characters.
 Legacy strings are created by PyUnicode_FromUnicode() and
 PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
 when PyUnicode_READY() is called.
 See also _PyUnicode_CheckConsistency().
 */
 PyObject_HEAD
 Py_ssize_t length; /* Number of code points in the string */
 Py_hash_t hash; /* Hash value; -1 if not set */
 struct {
 /*
 SSTATE_NOT_INTERNED (0)
 SSTATE_INTERNED_MORTAL (1)
 SSTATE_INTERNED_IMMORTAL (2)
 If interned != SSTATE_NOT_INTERNED, the two references from the
 dictionary to this object are *not* counted in ob_refcnt.
 */
 unsigned int interned:2;
 /* Character size:
 - PyUnicode_WCHAR_KIND (0):
 * character type = wchar_t (16 or 32 bits, depending on the
 platform)
 - PyUnicode_1BYTE_KIND (1):
 * character type = Py_UCS1 (8 bits, unsigned)
 * if ascii is set, all characters must be in range
 U+0000-U+007F, otherwise at least one character must be in range
 U+0080-U+00FF
 - PyUnicode_2BYTE_KIND (2):
 * character type = Py_UCS2 (16 bits, unsigned)
 * at least one character must be in range U+0100-U+FFFF
 - PyUnicode_4BYTE_KIND (4):
 * character type = Py_UCS4 (32 bits, unsigned)
 * at least one character must be in range U+10000-U+10FFFF
 */
 unsigned int kind:3;
 /* Compact is with respect to the allocation scheme. Compact unicode
 objects only require one memory block while non-compact objects use
 one block for the PyUnicodeObject struct and another for its data
 buffer. */
 unsigned int compact:1;
 /* The string only contains characters in range U+0000-U+007F (ASCII)
 and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
 set, use the PyASCIIObject structure. */
 unsigned int ascii:1;
 /* The ready flag indicates whether the object layout is initialized
 completely. This means that this is either a compact object, or
 the data pointer is filled out. The bit is redundant, and helps
 to minimize the test in PyUnicode_IS_READY(). */
 unsigned int ready:1;
 } state;
 wchar_t *wstr; /* wchar_t representation (null-terminated) */
} PyASCIIObject;
/* Non-ASCII strings allocated through PyUnicode_New use the
 PyCompactUnicodeObject structure. state.compact is set, and the data
 immediately follow the structure. */
typedef struct {
 PyASCIIObject _base;
 Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
 * terminating 0円. */
 char *utf8; /* UTF-8 representation (null-terminated) */
 Py_ssize_t wstr_length; /* Number of code points in wstr, possible
 * surrogates count as two code points. */
} PyCompactUnicodeObject;
/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
 PyUnicodeObject structure. The actual string data is initially in the wstr
 block, and copied into the data block using _PyUnicode_Ready. */
typedef struct {
 PyCompactUnicodeObject _base;
 union {
 void *any;
 Py_UCS1 *latin1;
 Py_UCS2 *ucs2;
 Py_UCS4 *ucs4;
 } data; /* Canonical, smallest-form Unicode buffer */
} PyUnicodeObject;
#define PyUnicode_IS_ASCII(op) \
 (((PyASCIIObject*)op)->state.ascii)
/* Return true if the string is compact or 0 if not.
 No type checks or Ready calls are performed. */
#define PyUnicode_IS_COMPACT(op) \
 (((PyASCIIObject*)(op))->state.compact)
/* Return true if the string is a compact ASCII string (use PyASCIIObject
 structure), or 0 if not. No type checks or Ready calls are performed. */
#define PyUnicode_IS_COMPACT_ASCII(op) \
 (PyUnicode_IS_ASCII(op) && PyUnicode_IS_COMPACT(op))
enum PyUnicode_Kind {
/* String contains only wstr byte characters. This is only possible
 when the string was created with a legacy API and _PyUnicode_Ready()
 has not been called yet. */
 PyUnicode_WCHAR_KIND = 0,
/* Return values of the PyUnicode_KIND() macro: */
 PyUnicode_1BYTE_KIND = 1,
 PyUnicode_2BYTE_KIND = 2,
 PyUnicode_4BYTE_KIND = 4
};
/* Return pointers to the canonical representation cast to unsigned char,
 Py_UCS2, or Py_UCS4 for direct character access.
 No checks are performed, use PyUnicode_KIND() before to ensure
 these will work correctly. */
#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
/* Return one of the PyUnicode_*_KIND values defined above. */
#define PyUnicode_KIND(op) \
 (assert(PyUnicode_IS_READY(op)), \
 ((PyASCIIObject *)(op))->state.kind)
/* Return a void pointer to the raw unicode buffer. */
#define _PyUnicode_COMPACT_DATA(op) \
 (PyUnicode_IS_COMPACT_ASCII(op) ? \
 ((void*)((PyASCIIObject*)(op) + 1)) : \
 ((void*)((PyCompactUnicodeObject*)(op) + 1)))
#define _PyUnicode_NONCOMPACT_DATA(op) \
 (assert(((PyUnicodeObject*)(op))->data.any), \
 ((((PyUnicodeObject *)(op))->data.any)))
#define PyUnicode_DATA(op) \
 (PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
 _PyUnicode_NONCOMPACT_DATA(op))
int main()
{
 PyASCIIObject *ascii;
 PyCompactUnicodeObject *compact;
 char *data;
 compact = malloc(sizeof(PyCompactUnicodeObject) + 2);
 ascii = (PyASCIIObject*)compact;
 ascii->state.kind = 1;
 ascii->state.compact = 1;
 assert(sizeof(Py_UCS2) == 2);
 assert(sizeof(Py_UCS4) == 4);
 printf("is ascii? %i\n", PyUnicode_IS_ASCII(compact));
 printf("is compact? %i\n", PyUnicode_IS_COMPACT(compact));
 printf("is compact ascii? %i\n", PyUnicode_IS_COMPACT_ASCII(compact));
 data = _PyUnicode_COMPACT_DATA(compact);
 printf("_PyUnicode_COMPACT_DATA: %u vs %u\n",
 (unsigned)(data - (char*)compact),
 sizeof(PyCompactUnicodeObject));
 data = PyUnicode_DATA(compact);
 printf("PyUnicode_DATA: %u vs %u\n",
 (unsigned)(data - (char*)compact),
 sizeof(PyCompactUnicodeObject));
 data = (char*)((PyCompactUnicodeObject*)compact + 1);
 printf("explicit cast: %u vs %u\n",
 (unsigned)(data - (char*)compact),
 sizeof(PyCompactUnicodeObject));
 return 0;
}
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://bugs.python.org/file23490/unicode.c">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://bugs.python.org/file23490/unicode.c" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>