dlib C++ Library - unicode_abstract.h

// Copyright (C) 2007 Davis E. King (davis@dlib.net), and Nils Labugt
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_UNICODe_ABSTRACT_H_
#ifdef DLIB_UNICODe_ABSTRACT_H_
#include "../uintn.h"
#include "../error.h"
#include <string>
#include <fstream>
namespace dlib
{
// ----------------------------------------------------------------------------------------
 // a typedef for an unsigned 32bit integer to hold our UNICODE characters 
 using unichar = uint32;
 // a typedef for a string object to hold our UNICODE strings
 using ustring = std::basic_string<unichar>;
// ----------------------------------------------------------------------------------------
 template <typename T>
 bool is_combining_char(
 const T ch_
 );
 /*!
 ensures
 - if (ch_ is a unicode combining character) then
 - returns true
 - else
 - returns false
 !*/
 bool is_surrogate(
 unichar ch
 );
 /*!
 ensures
 - if (ch is a unicode surrogate character) then
 - returns true
 - else
 - returns false
 !*/
 unichar surrogate_pair_to_unichar(
 unichar first, 
 unichar second
 );
 /*!
 requires
 - 0xD800 <= first < 0xDC00
 - 0xDC00 <= second < 0xE000
 - is_surrogate(first) == true
 - is_surrogate(second) == true
 ensures
 - converts two surrogates into one unicode character
 !*/
 void unichar_to_surrogate_pair(
 unichar ch, 
 unichar& first, 
 unichar& second
 );
 /*!
 requires
 - ch >= 0x10000 (i.e. is not in Basic Multilingual Plane) 
 ensures
 - surrogate_pair_to_unichar(#first,#second) == ch
 (i.e. converts ch into two surrogate characters)
 !*/
// ----------------------------------------------------------------------------------------
 class invalid_utf8_error : public error
 {
 public:
 invalid_utf8_error():error(EUTF8_TO_UTF32) {}
 };
 template <typename forward_iterator, typename unary_op>
 inline void convert_to_utf32 (
 forward_iterator ibegin,
 forward_iterator iend,
 unary_op op
 );
 /*!
 requires
 - forward_iterator points to either char, wchar_t or unichar types
 - ibegin == iterator pointing to the start of the range
 - iend == iterator pointing to the end of the range
 - unary_op == a callable object that takes one unichar parameter
 ensures
 - visits the range [ibegin, iend) in order and converts the input
 characters into utf-32 characters.
 - calls op(ch) on each converted UTF-32 character.
 - if (an error occurs while converting the characters)
 - throws invalid_utf8_error
 !*/
 template <typename char_type, typename traits, typename alloc>
 const ustring convert_to_utf32 (
 const std::basic_string<char_type, traits, alloc>& str
 );
 /*!
 requires
 - char_type is char, wchar_t or unichar
 ensures
 - Converts any UTF character stream to UTF-32. E.g. inputs
 can be UTF-8, UTF-16, or UTF-32 and the result is the UTF-32 equivalent.
 throws
 - invalid_utf8_error if we were unable to do the conversion.
 !*/
 const ustring convert_utf8_to_utf32 (
 const std::string& str
 );
 /*!
 ensures
 - if (str is a valid UTF-8 encoded string) then
 - returns a copy of str that has been converted into a
 unichar string
 - else
 - throws invalid_utf8_error
 !*/
// ----------------------------------------------------------------------------------------
 const ustring convert_wstring_to_utf32 (
 const std::wstring &wstr
 );
 /*!
 requires
 - wstr is a valid UTF-16 string when sizeof(wchar_t) == 2
 - wstr is a valid UTF-32 string when sizeof(wchar_t) == 4
 ensures
 - converts wstr into UTF-32 string
 !*/
// ----------------------------------------------------------------------------------------
 const std::wstring convert_utf32_to_wstring (
 const ustring &str
 );
 /*!
 requires
 - str is a valid UTF-32 encoded string
 ensures
 - converts str into wstring whose encoding is UTF-16 when sizeof(wchar_t) == 2
 - converts str into wstring whose encoding is UTF-32 when sizeof(wchar_t) == 4
 !*/
// ----------------------------------------------------------------------------------------
 const std::wstring convert_mbstring_to_wstring (
 const std::string &str
 );
 /*!
 requires
 - str is a valid multibyte string whose encoding is same as current locale setting
 ensures
 - converts str into wstring whose encoding is UTF-16 when sizeof(wchar_t) == 2
 - converts str into wstring whose encoding is UTF-32 when sizeof(wchar_t) == 4
 !*/
// ----------------------------------------------------------------------------------------
 const std::string convert_wstring_to_mbstring (
 const std::wstring &src
 );
 /*!
 requires
 - str is a valid wide character string string whose encoding is same as current 
 locale setting
 ensures
 - returns a multibyte encoded version of the given string
 !*/
// ----------------------------------------------------------------------------------------
 template <
 typename charT
 >
 class basic_utf8_ifstream : public std::basic_istream<charT>
 {
 /*!
 WHAT THIS OBJECT REPRESENTS
 This object represents an input file stream much like the
 normal std::ifstream except that it knows how to read UTF-8 
 data. So when you read characters out of this stream it will
 automatically convert them from the UTF-8 multibyte encoding
 into a fixed width wide character encoding.
 !*/
 public:
 basic_utf8_ifstream (
 );
 /*!
 ensures
 - constructs an input stream that isn't yet associated with
 a file.
 !*/
 basic_utf8_ifstream (
 const char* file_name,
 std::ios_base::openmode mode = std::ios::in 
 );
 /*!
 ensures
 - tries to open the given file for reading by this stream
 - mode is interpreted exactly the same was as the open mode
 argument used by std::ifstream.
 !*/
 basic_utf8_ifstream (
 const std::string& file_name,
 std::ios_base::openmode mode = std::ios::in 
 );
 /*!
 ensures
 - tries to open the given file for reading by this stream
 - mode is interpreted exactly the same was as the open mode
 argument used by std::ifstream.
 !*/
 void open(
 const std::string& file_name,
 std::ios_base::openmode mode = std::ios::in 
 );
 /*!
 ensures
 - tries to open the given file for reading by this stream
 - mode is interpreted exactly the same was as the open mode
 argument used by std::ifstream.
 !*/
 void open (
 const char* file_name,
 std::ios_base::openmode mode = std::ios::in 
 );
 /*!
 ensures
 - tries to open the given file for reading by this stream
 - mode is interpreted exactly the same was as the open mode
 argument used by std::ifstream.
 !*/
 void close (
 );
 /*!
 ensures
 - any file opened by this stream has been closed
 !*/
 };
 using utf8_uifstream = basic_utf8_ifstream<unichar>;
 using utf8_wifstream = basic_utf8_ifstream<wchar_t>;
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_UNICODe_ABSTRACT_H_

AltStyle によって変換されたページ (->オリジナル) /