\documentclass[12pt,titlepage]{article}
\usepackage{amsmath}
\usepackage{mathrsfs}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{mathtools}
\usepackage{graphicx}
\usepackage{color}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage{xparse}
\usepackage{tikz}
\usepackage{hyperref}
%----Macros----------
%
% Unresolved issues:
%
% \righttoleftarrow
% \lefttorightarrow
%
% \color{} with HTML colorspec
% \bgcolor
% \array with options (without options, it's equivalent to the matrix environment)
% Of the standard HTML named colors, white, black, red, green, blue and yellow
% are predefined in the color package. Here are the rest.
\definecolor{aqua}{rgb}{0, 1.0, 1.0}
\definecolor{fuschia}{rgb}{1.0, 0, 1.0}
\definecolor{gray}{rgb}{0.502, 0.502, 0.502}
\definecolor{lime}{rgb}{0, 1.0, 0}
\definecolor{maroon}{rgb}{0.502, 0, 0}
\definecolor{navy}{rgb}{0, 0, 0.502}
\definecolor{olive}{rgb}{0.502, 0.502, 0}
\definecolor{purple}{rgb}{0.502, 0, 0.502}
\definecolor{silver}{rgb}{0.753, 0.753, 0.753}
\definecolor{teal}{rgb}{0, 0.502, 0.502}
% Because of conflicts, \space and \mathop are converted to
% \itexspace and \operatorname during preprocessing.
% itex: \space{ht}{dp}{wd}
%
% Height and baseline depth measurements are in units of tenths of an ex while
% the width is measured in tenths of an em.
\makeatletter
\newdimen\itex@wd%
\newdimen\itex@dp%
\newdimen\itex@thd%
\def\itexspace#1#2#3{\itex@wd=#3em%
\itex@wd=0.1\itex@wd%
\itex@dp=#2ex%
\itex@dp=0.1\itex@dp%
\itex@thd=#1ex%
\itex@thd=0.1\itex@thd%
\advance\itex@thd\the\itex@dp%
\makebox[\the\itex@wd]{\rule[-\the\itex@dp]{0cm}{\the\itex@thd}}}
\makeatother
% \tensor and \multiscript
\makeatletter
\newif\if@sup
\newtoks\@sups
\def\append@sup#1{\edef\act{\noexpand\@sups={\the\@sups #1}}\act}%
\def\reset@sup{\@supfalse\@sups={}}%
\def\mk@scripts#1#2{\if #2/ \if@sup ^{\the\@sups}\fi \else%
 \ifx #1_ \if@sup ^{\the\@sups}\reset@sup \fi {}_{#2}%
 \else \append@sup#2 \@suptrue \fi%
 \expandafter\mk@scripts\fi}
\def\tensor#1#2{\reset@sup#1\mk@scripts#2_/}
\def\multiscripts#1#2#3{\reset@sup{}\mk@scripts#1_/#2%
 \reset@sup\mk@scripts#3_/}
\makeatother
% \slash
\makeatletter
\newbox\slashbox \setbox\slashbox=\hbox{$/$}
\def\itex@pslash#1{\setbox\@tempboxa=\hbox{$#1$}
 \@tempdima=0.5\wd\slashbox \advance\@tempdima 0.5\wd\@tempboxa
 \copy\slashbox \kern-\@tempdima \box\@tempboxa}
\def\slash{\protect\itex@pslash}
\makeatother
% math-mode versions of \rlap, etc
% from Alexander Perlis, "A complement to \smash, \llap, and lap"
% http://math.arizona.edu/~aprl/publications/mathclap/
\def\clap#1{\hbox to 0pt{\hss#1\hss}}
\def\mathllap{\mathpalette\mathllapinternal}
\def\mathrlap{\mathpalette\mathrlapinternal}
\def\mathclap{\mathpalette\mathclapinternal}
\def\mathllapinternal#1#2{\llap{$\mathsurround=0pt#1{#2}$}}
\def\mathrlapinternal#1#2{\rlap{$\mathsurround=0pt#1{#2}$}}
\def\mathclapinternal#1#2{\clap{$\mathsurround=0pt#1{#2}$}}
% Renames \sqrt as \oldsqrt and redefine root to result in \sqrt[#1]{#2}
\let\oldroot\root
\def\root#1#2{\oldroot #1 \of{#2}}
\renewcommand{\sqrt}[2][]{\oldroot #1 \of{#2}}
% Manually declare the txfonts symbolsC font
\DeclareSymbolFont{symbolsC}{U}{txsyc}{m}{n}
\SetSymbolFont{symbolsC}{bold}{U}{txsyc}{bx}{n}
\DeclareFontSubstitution{U}{txsyc}{m}{n}
% Manually declare the stmaryrd font
\DeclareSymbolFont{stmry}{U}{stmry}{m}{n}
\SetSymbolFont{stmry}{bold}{U}{stmry}{b}{n}
% Manually declare the MnSymbolE font
\DeclareFontFamily{OMX}{MnSymbolE}{}
\DeclareSymbolFont{mnomx}{OMX}{MnSymbolE}{m}{n}
\SetSymbolFont{mnomx}{bold}{OMX}{MnSymbolE}{b}{n}
\DeclareFontShape{OMX}{MnSymbolE}{m}{n}{
 <-6> MnSymbolE5
 <6-7> MnSymbolE6
 <7-8> MnSymbolE7
 <8-9> MnSymbolE8
 <9-10> MnSymbolE9
 <10-12> MnSymbolE10
 <12-> MnSymbolE12}{}
% Declare specific arrows from txfonts without loading the full package
\makeatletter
\def\re@DeclareMathSymbol#1#2#3#4{%
 \let#1=\undefined
 \DeclareMathSymbol{#1}{#2}{#3}{#4}}
\re@DeclareMathSymbol{\neArrow}{\mathrel}{symbolsC}{116}
\re@DeclareMathSymbol{\neArr}{\mathrel}{symbolsC}{116}
\re@DeclareMathSymbol{\seArrow}{\mathrel}{symbolsC}{117}
\re@DeclareMathSymbol{\seArr}{\mathrel}{symbolsC}{117}
\re@DeclareMathSymbol{\nwArrow}{\mathrel}{symbolsC}{118}
\re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118}
\re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119}
\re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119}
\re@DeclareMathSymbol{\nequiv}{\mathrel}{symbolsC}{46}
\re@DeclareMathSymbol{\Perp}{\mathrel}{symbolsC}{121}
\re@DeclareMathSymbol{\Vbar}{\mathrel}{symbolsC}{121}
\re@DeclareMathSymbol{\sslash}{\mathrel}{stmry}{12}
\re@DeclareMathSymbol{\boxslash}{\mathrel}{stmry}{27}
\re@DeclareMathSymbol{\boxbslash}{\mathrel}{stmry}{28}
\re@DeclareMathSymbol{\boxast}{\mathrel}{stmry}{24}
\re@DeclareMathSymbol{\boxcircle}{\mathrel}{stmry}{29}
\re@DeclareMathSymbol{\boxbox}{\mathrel}{stmry}{30}
\re@DeclareMathSymbol{\obslash}{\mathrel}{stmry}{20}
\re@DeclareMathSymbol{\obar}{\mathrel}{stmry}{58}
\re@DeclareMathSymbol{\olessthan}{\mathrel}{stmry}{60}
\re@DeclareMathSymbol{\ogreaterthan}{\mathrel}{stmry}{61}
\re@DeclareMathSymbol{\bigsqcap}{\mathop}{stmry}{"64}
\re@DeclareMathSymbol{\biginterleave}{\mathop}{stmry}{"6}
\re@DeclareMathSymbol{\invamp}{\mathrel}{symbolsC}{77}
\re@DeclareMathSymbol{\parr}{\mathrel}{symbolsC}{77}
\makeatother
% \llangle, \rrangle, \lmoustache and \rmoustache from MnSymbolE
\makeatletter
\def\Decl@Mn@Delim#1#2#3#4{%
 \if\relax\noexpand#1%
 \let#1\undefined
 \fi
 \DeclareMathDelimiter{#1}{#2}{#3}{#4}{#3}{#4}}
\def\Decl@Mn@Open#1#2#3{\Decl@Mn@Delim{#1}{\mathopen}{#2}{#3}}
\def\Decl@Mn@Close#1#2#3{\Decl@Mn@Delim{#1}{\mathclose}{#2}{#3}}
\Decl@Mn@Open{\llangle}{mnomx}{'164}
\Decl@Mn@Close{\rrangle}{mnomx}{'171}
\Decl@Mn@Open{\lmoustache}{mnomx}{'245}
\Decl@Mn@Close{\rmoustache}{mnomx}{'244}
\Decl@Mn@Open{\llbracket}{stmry}{'112}
\Decl@Mn@Close{\rrbracket}{stmry}{'113}
\makeatother
% Widecheck
\makeatletter
\DeclareRobustCommand\widecheck[1]{{\mathpalette\@widecheck{#1}}}
\def\@widecheck#1#2{%
 \setbox\z@\hbox{\m@th$#1#2$}%
 \setbox\tw@\hbox{\m@th$#1%
 \widehat{%
 \vrule\@width\z@\@height\ht\z@
 \vrule\@height\z@\@width\wd\z@}$}%
 \dp\tw@-\ht\z@
 \@tempdima\ht\z@ \advance\@tempdima2\ht\tw@ \divide\@tempdima\thr@@
 \setbox\tw@\hbox{%
 \raise\@tempdima\hbox{\scalebox{1}[-1]{\lower\@tempdima\box
\tw@}}}%
 {\ooalign{\box\tw@ \cr \box\z@}}}
\makeatother
% \mathraisebox{voffset}[height][depth]{something}
\makeatletter
\NewDocumentCommand\mathraisebox{moom}{%
\IfNoValueTF{#2}{\def\@temp##1##2{\raisebox{#1}{$\m@th##1##2$}}}{%
\IfNoValueTF{#3}{\def\@temp##1##2{\raisebox{#1}[#2]{$\m@th##1##2$}}%
}{\def\@temp##1##2{\raisebox{#1}[#2][#3]{$\m@th##1##2$}}}}%
\mathpalette\@temp{#4}}
\makeatletter
% udots (taken from yhmath)
\makeatletter
\def\udots{\mathinner{\mkern2mu\raise\p@\hbox{.}
\mkern2mu\raise4\p@\hbox{.}\mkern1mu
\raise7\p@\vbox{\kern7\p@\hbox{.}}\mkern1mu}}
\makeatother
%% Fix array
\newcommand{\itexarray}[1]{\begin{matrix}#1\end{matrix}}
%% \itexnum is a noop
\newcommand{\itexnum}[1]{#1}
%% Renaming existing commands
\newcommand{\underoverset}[3]{\underset{#1}{\overset{#2}{#3}}}
\newcommand{\widevec}{\overrightarrow}
\newcommand{\darr}{\downarrow}
\newcommand{\nearr}{\nearrow}
\newcommand{\nwarr}{\nwarrow}
\newcommand{\searr}{\searrow}
\newcommand{\swarr}{\swarrow}
\newcommand{\curvearrowbotright}{\curvearrowright}
\newcommand{\uparr}{\uparrow}
\newcommand{\downuparrow}{\updownarrow}
\newcommand{\duparr}{\updownarrow}
\newcommand{\updarr}{\updownarrow}
\newcommand{\gt}{>}
\newcommand{\lt}{<}
\newcommand{\map}{\mapsto}
\newcommand{\embedsin}{\hookrightarrow}
\newcommand{\Alpha}{A}
\newcommand{\Beta}{B}
\newcommand{\Zeta}{Z}
\newcommand{\Eta}{H}
\newcommand{\Iota}{I}
\newcommand{\Kappa}{K}
\newcommand{\Mu}{M}
\newcommand{\Nu}{N}
\newcommand{\Rho}{P}
\newcommand{\Tau}{T}
\newcommand{\Upsi}{\Upsilon}
\newcommand{\omicron}{o}
\newcommand{\lang}{\langle}
\newcommand{\rang}{\rangle}
\newcommand{\Union}{\bigcup}
\newcommand{\Intersection}{\bigcap}
\newcommand{\Oplus}{\bigoplus}
\newcommand{\Otimes}{\bigotimes}
\newcommand{\Wedge}{\bigwedge}
\newcommand{\Vee}{\bigvee}
\newcommand{\coproduct}{\coprod}
\newcommand{\product}{\prod}
\newcommand{\closure}{\overline}
\newcommand{\integral}{\int}
\newcommand{\doubleintegral}{\iint}
\newcommand{\tripleintegral}{\iiint}
\newcommand{\quadrupleintegral}{\iiiint}
\newcommand{\conint}{\oint}
\newcommand{\contourintegral}{\oint}
\newcommand{\infinity}{\infty}
\newcommand{\bottom}{\bot}
\newcommand{\minusb}{\boxminus}
\newcommand{\plusb}{\boxplus}
\newcommand{\timesb}{\boxtimes}
\newcommand{\intersection}{\cap}
\newcommand{\union}{\cup}
\newcommand{\Del}{\nabla}
\newcommand{\odash}{\circleddash}
\newcommand{\negspace}{\!}
\newcommand{\widebar}{\overline}
\newcommand{\textsize}{\normalsize}
\renewcommand{\scriptsize}{\scriptstyle}
\newcommand{\scriptscriptsize}{\scriptscriptstyle}
\newcommand{\mathfr}{\mathfrak}
\newcommand{\statusline}[2]{#2}
\newcommand{\tooltip}[2]{#2}
\newcommand{\toggle}[2]{#2}
% Theorem Environments
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{prop}{Proposition}
\newtheorem{cor}{Corollary}
\newtheorem*{utheorem}{Theorem}
\newtheorem*{ulemma}{Lemma}
\newtheorem*{uprop}{Proposition}
\newtheorem*{ucor}{Corollary}
\theoremstyle{definition}
\newtheorem{defn}{Definition}
\newtheorem{example}{Example}
\newtheorem*{udefn}{Definition}
\newtheorem*{uexample}{Example}
\theoremstyle{remark}
\newtheorem{remark}{Remark}
\newtheorem{note}{Note}
\newtheorem*{uremark}{Remark}
\newtheorem*{unote}{Note}
%-------------------------------------------------------------------
\begin{document}
%-------------------------------------------------------------------
\section*{Unicode}
\hypertarget{introduction_to_fortran_unicode_support}{}\paragraph*{{Introduction to Fortran Unicode support}}\label{introduction_to_fortran_unicode_support}
\hypertarget{reading_writing_and_processing_utf8_data_using_fortran_unicode_codepoints_and_their_encodings}{}\subsection*{{Reading, writing and processing UTF-8 data using Fortran, Unicode codepoints, and their encodings}}\label{reading_writing_and_processing_utf8_data_using_fortran_unicode_codepoints_and_their_encodings}
Unicode is an international standard for encoding text that assigns a unique whole numeric value, called a \textbf{code point}, to every character, symbol, and emoji from virtually all written languages and scripts in the world. This allows computers to process, store, and display text correctly across different platforms by providing a universal mapping for characters.
There are several standardized ways to encode the code points. The interest here is in two of them -- UTF-8 and UCS-4 encoding.
\textbf{UTF-8 encoding} has emerged as the de-facto standard format for representing Unicode in text files on all major operating systems.
Not all code points are stored with the same number of bytes in UTF-8. The characters represented in single-byte ASCII-7 characters are represented by the same single byte in UTF-8 as well, but other characters require from two to four bytes of storage. This means ASCII-7 is a subset of UTF-8 but UTF-8 can represent far more characters. This compatibility with ASCII is a very large advantage of UTF-8 encoding over other code point encodings as a file format, contributing to it becoming a de-facto standard.
\textbf{UCS-4 encoding} is simpler and homogeneous. Each code point is stored as a 32-bit value, thus using the same amount of bytes for each character (unlike UTF-8). This format is often used to internally encode Unicode code points in various computing languages.
UCS-4 encoding shares the trait of constant storage size per element with all Fortran intrinsic types, making it a natural fit for the internal representation of code points in the Fortran language.
Since the release of the 2003 standard, Fortran does indeed \textbf{optionally} support processing of Unicode UTF-8-encoded files in this manner. Data is internally stored using UCS-4 encoding but translated to and from UCS-8 encoding during formatted I/O. This option will be referred to as the \textbf{Fortran ISO\_10646 standard}.
\hypertarget{glyphs}{}\subsubsection*{{Glyphs}}\label{glyphs}
A character encoded using UCS-4 or UTF-8 is often referred to as a \textbf{``glyph''} to differentiate it from ASCII characters. ``Glyph'' more technically is actually the name for the appearance of the rendering of the character via a font. But it will be used here as well as representing a Unicode ``character''.
\hypertarget{the_guides}{}\subsubsection*{{The Guides}}\label{the_guides}
The following guides describe using UTF-8 files from Fortran codes. They not only include examples using the standard-specified ISO\_10646 extension, but describe how to process UTF-8 encoded data without the extension. They include discussions concerning what is standardized and what is not, what commonly-used extensions compilers provide to address some of the current gaps in Unicode support, and what is known to be potentially non-portable but useful behavior from various compilers/processors.
The resulting methods are incorporated into Fortran Modules available via github repositories.
The selection of methods to employ breaks down along these major divides:
\begin{itemize}%
\item [[ucs4&#124;using the optional Fortran ISO\_10646 standard]].
The first guide set assumes you want to use the ISO\_10646 extension and would prefer to conform as portably as reasonable to the Fortran standard; and probably avoid using UTF-8-encoded constant strings.
\item [[no\_iso\_10646&#124;processing UTF-8 data without using the ISO\_10646 extension]].
\item [[utf8\_source\_ext&#124;using UTF-8-encoded source files]] versus using only Fortran source files strictly adhering to the Fortran character set
\item using [[extensions\_ext&#124;Common Unicode-related processor-dependent extensions]]
\item \href{https://github.com/urbanjost/M_ucs4}{M\_ucs4} - A Module supporting using the ISO\_10646 extension
A module supplementing the ISO\_10646 extension, including
\begin{itemize}%
\item low-level procedures for converting UTF-8 encoded byte streams to and from UCS-4 character variables.
\item additional conversion routines to bridge the gaps in Fortran
\item Unicode processing
\item related utility programs
\end{itemize}
\item \href{https://github.com/urbanjost/M_unicode}{M\_unicode} - Processing UTF-8 data without depending on the ISO\_10646 extension
A module defining a user-defined type that allows for ragged arrays of Unicode data and overlays of intrinsic functions along with many common character methods allowing additional functions such as case conversion, sorting, and padding. This is a very complete inteface for processing UTF-8 encoded data that does not require the optional ISO\_10646 extension. It provides both a functional and OOP interface.
\end{itemize}
\vspace{.5em} \hrule \vspace{.5em}
\begin{itemize}%
\item [[ucs4&#124;NEXT]]
\end{itemize}
\end{document}
</div><div class="naked_ctrl">
<form action="/index.cgi/larger-text" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://fortranwiki.org/fortran/tex/Unicode">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://fortranwiki.org/fortran/tex/Unicode" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast">配色反転</option>
<option value="larger-text" selected="selected">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>