/* PSPP - a program for statistical analysis.
 Copyright (C) 2006, 2010, 2011, 2012, 2014, 2016 Free Software Foundation, Inc.
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef I18N_H
#define I18N_H
/*
 PSPP has three ``working'' locales:
 * The user interface locale.
 This is the locale which is visible to the person using pspp. Error
 messages and confidence indications are written in this locale. For
 example ``Cannot open file'' will be written in the user interface locale.
 This locale is set from the environment of the user who starts PSPP or from
 the system locale if not set.
 * The output locale.
 This locale should be visible to the person reading a report generated by
 pspp. Non-data related strings (e.g., "Page number", "Standard Deviation"
 etc.) appear in this locale.
 * The data locale.
 Only the character encoding is relevant.
 This locale is the one associated with the data being analysed. The only
 important aspect of this locale is the character encoding. (It might also
 be desirable for the LC_COLLATE category to be used for the purposes of
 sorting data.) The dictionary pertaining to the data contains a field
 denoting the encoding. Any string data stored in a "union value" is
 encoded in the dictionary's character set.
 Each of these locales may, at different times take separate (or identical)
 values. So for example, a French statistician can use pspp to prepare a
 report in the English language, using a datafile which has been created by a
 Japanese researcher hence uses a Japanese character set.
 It's rarely, if ever, necessary to interrogate the system to find out the
 values of the 3 locales. However it's important to be aware of the source
 (destination) locale when reading (writing) string data. When transferring
 data between a source and a destination, the appropriate recoding must be
 performed.
 System Files
 ============
 '.sav' files contain a field which is supposed to identify the encoding of
 the data they contain. However, many files produced by early versions of
 spss set this to "2" (ASCII) regardless of the encoding of the data. Later
 versions contain an additional record (the "Character Encoding Record")
 describing the encoding. When a system file is read, the dictionary's
 encoding is set using information gleaned from the system file. If the
 encoding cannot be determined or would be unreliable, then it remains unset.
 GUI
 ===
 The psppire graphic user interface is written using the GTK+ api, for which
 all strings must be encoded in UTF-8. All strings passed to the GTK+/GLib
 library functions (except for filenames) must be UTF-8 encoded otherwise
 errors will occur. Thus, for the purposes of programming PSPPIRE, the user
 interface locale should be assumed to be UTF-8, even if setlocale() and/or
 nl_langinfo indicates otherwise.
 Filenames
 ---------
 The GLib API has some special functions for dealing with filenames. Strings
 returned from functions like gtk_file_chooser_dialog_get_name() are not, in
 general, encoded in UTF-8, but in "filename" encoding. If that filename is
 passed to another GLib function which expects a filename, no conversion is
 necessary. If it's passed to a function for the purposes of displaying it
 (e.g. in a window's title-bar) it must be converted to UTF-8 (there is a
 special function for this: g_filename_display_name or g_filename_basename).
 If however, a filename needs to be passed outside of GTK+/GLib, e.g. to
 fopen, it must be converted to the local system encoding.
 Existing Locale Handling Functions
 ==================================
 The major aspect of locale handling which the programmer has to consider is
 that of character encoding. recode_string() is the main function for
 changing the encoding of strings.
 To minimise the number of conversions required, and to simplify design, PSPP
 attempts to store all internal strings in UTF-8 encoding. Thus, when reading
 system and portable files (or any other data source), the following items are
 immediately converted to UTF-8
 * Variable names
 * Variable labels
 * Value labels
 Conversely, when writing system files, these are converted back to the
 encoding of that system file.
 String data stored in "union value"s are left in their original encoding.
 These are converted for display later by data_out().
 Quirks
 ======
 For historical reasons, not all locale handling follows POSIX conventions.
 This makes it difficult (impossible?) to elegantly handle issues. For
 example, it would make sense for the GUI's datasheet to display numbers
 formatted according to LC_NUMERIC. Instead however there is data_out(),
 which uses settings_get_decimal_char() function instead of the locale's
 decimal separator. Similarly, formatting of monetary values is displayed in
 a PSPP/SPSS-specific fashion instead of using LC_MONETARY.
*/
#include "libpspp/compiler.h"
#include "libpspp/str.h"
#include <stdbool.h>
#include <unistr.h>
void i18n_done (void);
void i18n_init (void);
#define UTF8 "UTF-8"
/* The encoding of literal strings in PSPP source code, as seen at execution
 time. In fact this is likely to be some extended ASCII encoding, such as
 UTF-8 or ISO-8859-1, but ASCII is adequate for our purposes. */
#define C_ENCODING "ASCII"
struct pool;
char recode_byte (const char *to, const char *from, char);
char *recode_string (const char *to, const char *from,
 const char *text, int len);
char *recode_string_pool (const char *to, const char *from,
 const char *text, int length, struct pool *);
struct substring recode_substring_pool (const char *to, const char *from,
 struct substring text, struct pool *);
int recode_pedantically (const char *to, const char *from,
 struct substring text, struct pool *,
 struct substring *out);
size_t recode_string_len (const char *to, const char *from,
 const char *text, int len);
char *utf8_encoding_trunc (const char *, const char *encoding,
 size_t max_len);
size_t utf8_encoding_trunc_len (const char *, const char *encoding,
 size_t max_len);
char *utf8_encoding_concat (const char *head, const char *tail,
 const char *encoding, size_t max_len);
size_t utf8_encoding_concat_len (const char *head, const char *tail,
 const char *encoding, size_t max_len);
size_t utf8_count_columns (const char *, size_t);
size_t utf8_columns_to_bytes (const char *, size_t, size_t n_columns);
char *utf8_to_filename (const char *filename);
char *filename_to_utf8 (const char *filename);
bool valid_encoding (const char *enc);
char get_system_decimal (void);
const char * get_default_encoding (void);
void set_default_encoding (const char *enc);
bool set_encoding_from_locale (const char *loc);
const char *uc_name (ucs4_t uc, char buffer[16]);
unsigned int utf8_hash_case_bytes (const char *, size_t n, unsigned int basis) WARN_UNUSED_RESULT;
unsigned int utf8_hash_case_string (const char *, unsigned int basis) WARN_UNUSED_RESULT;
unsigned int utf8_hash_case_substring (struct substring, unsigned int basis)
 WARN_UNUSED_RESULT;
int utf8_strcasecmp (const char *, const char *);
int utf8_sscasecmp (struct substring, struct substring);
int utf8_strncasecmp (const char *, size_t, const char *, size_t);
int utf8_strverscasecmp (const char *, const char *);
char *utf8_to_upper (const char *);
char *utf8_to_lower (const char *);
char *utf8_to_title (const char *);

/* Information about character encodings. */
/* ISO C defines a set of characters that a C implementation must support at
 runtime, called the C basic execution character set, which consists of the
 following characters:
 A B C D E F G H I J K L M
 N O P Q R S T U V W X Y Z
 a b c d e f g h i j k l m
 n o p q r s t u v w x y z
 0 1 2 3 4 5 6 7 8 9
 ! " # % & ' () * + , - . / :
 ; < => ? [ \ ] ^ _ { &#124; } ~
 space \a \b \r \n \t \v \f 0円
 The following is true of every member of the C basic execution character
 set in all "reasonable" encodings:
 1. Every member of the C basic character set is encoded.
 2. Every member of the C basic character set has the same width in
 bytes, called the "unit width". Most encodings have a unit width of
 1 byte, but UCS-2 and UTF-16 have a unit width of 2 bytes and UCS-4
 and UTF-32 have a unit width of 4 bytes.
 3. In a stateful encoding, the encoding of members of the C basic
 character set does not vary with shift state.
 4. When a string is read unit-by-unit, a unit that has the encoded value
 of a member of the C basic character set, EXCEPT FOR THE DECIMAL
 DIGITS, always represents that member. That is, if the encoding has
 multi-unit characters, the units that encode the C basic character
 set are never part of a multi-unit character.
 The exception for decimal digits is due to GB18030, which uses
 decimal digits as part of multi-byte encodings.
 All 8-bit and wider encodings that I have been able to find follow these
 rules. 7-bit and narrower encodings (e.g. UTF-7) do not. I'm not too
 concerned about that. */
#include <stdbool.h>
/* Maximum width of a unit, in bytes. UTF-32 with 4-byte units is the widest
 that I am aware of. */
#define MAX_UNIT 4
/* Information about an encoding. */
struct encoding_info
 {
 /* Encoding name. IANA says character set names may be up to 40 US-ASCII
 characters. */
 char name[41];
 /* True if this encoding has a unit width of 1 byte, and every character
 used in ASCII text files has the same value in this encoding. */
 bool is_ascii_compatible;
 /* True if this encoding has a unit width of 1 byte and appears to be
 EBCDIC-based. */
 bool is_ebcdic_compatible;
 /* Character information. */
 int unit; /* Unit width, in bytes. */
 char cr[MAX_UNIT]; /* \r in encoding, 'unit' bytes long. */
 char lf[MAX_UNIT]; /* \n in encoding, 'unit' bytes long. */
 char space[MAX_UNIT]; /* ' ' in encoding, 'unit' bytes long. */
 };
bool get_encoding_info (struct encoding_info *, const char *name);
bool is_encoding_ascii_compatible (const char *encoding);
bool is_encoding_ebcdic_compatible (const char *encoding);
bool is_encoding_supported (const char *encoding);
bool is_encoding_utf8 (const char *encoding);

/* Database of encodings, by language or region. */
struct encoding_category
 {
 const char *category; /* e.g. "Arabic" or "Western European". */
 const char **encodings; /* Encodings within the category. */
 size_t n_encodings; /* Number of encodings in category. */
 };
struct encoding_category *get_encoding_categories (void);
size_t get_n_encoding_categories (void);
/* Return the ISO two letter code for the current LC_MESSAGES
 locale category. */
char *get_language (void);
#endif /* i18n.h */
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://cgit.git.savannah.gnu.org/cgit/pspp.git/plain/src/libpspp/i18n.h">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://cgit.git.savannah.gnu.org/cgit/pspp.git/plain/src/libpspp/i18n.h" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>