git.postgresql.org Git - postgresql.git/commitdiff

git projects / postgresql.git / commitdiff

author Peter Eisentraut <peter@eisentraut.org>

2019年10月13日 20:10:38 +0000 (22:10 +0200)

committer Peter Eisentraut <peter@eisentraut.org>

2019年10月13日 20:10:38 +0000 (22:10 +0200)

Use https, consistent host name, remove references to ftp. Also
update the URLs for CLDR, which has moved from Trac to GitHub.

contrib/unaccent/generate_unaccent_rules.py patch | blob | blame | history

doc/src/sgml/acronyms.sgml patch | blob | blame | history

doc/src/sgml/charset.sgml patch | blob | blame | history

src/backend/utils/mb/Unicode/Makefile patch | blob | blame | history

src/backend/utils/mb/Unicode/UCS_to_BIG5.pl patch | blob | blame | history

src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl patch | blob | blame | history

src/backend/utils/mb/Unicode/UCS_to_most.pl patch | blob | blame | history

src/common/unicode/Makefile patch | blob | blame | history

src/common/unicode_norm.c patch | blob | blame | history

diff --git a/contrib/unaccent/generate_unaccent_rules.py b/contrib/unaccent/generate_unaccent_rules.py

index 7a0a96e04f7cf90ba90a937552f82acae14d7e0a..acfb4f0b6868d280dc35e8062ef53c204d6534b7 100644 (file)

--- a/contrib/unaccent/generate_unaccent_rules.py

+++ b/contrib/unaccent/generate_unaccent_rules.py

@@ -24,9 +24,9 @@

# Latin-ASCII.xml, the latest data sets released can be browsed directly

# via [3]. Note that this script is compatible with at least release 29.

-# [1] http://unicode.org/Public/8.0.0/ucd/UnicodeData.txt

-# [2] http://unicode.org/cldr/trac/export/14746/tags/release-34/common/transforms/Latin-ASCII.xml

-# [3] https://unicode.org/cldr/trac/browser/tags

+# [1] https://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt

+# [2] https://raw.githubusercontent.com/unicode-org/cldr/release-34/common/transforms/Latin-ASCII.xml

+# [3] https://github.com/unicode-org/cldr/tags

# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped

# The approach is to be Python3 compatible with Python2 "backports".

@@ -113,7 +113,7 @@ def is_mark(codepoint):

def is_letter_with_marks(codepoint, table):

"""Returns true for letters combined with one or more marks."""

- # See http://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values

+ # See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values

# Letter may have no combining characters, in which case it has

# no marks.

@@ -226,7 +226,7 @@ def special_cases():

return charactersSet

def main(args):

- # http://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings

+ # https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings

decomposition_type_pattern = re.compile(" *<[^>]*> *")

table = {}

@@ -243,7 +243,7 @@ def main(args):

for line in unicodeDataFile:

fields = line.split(";")

if len(fields) > 5:

- # http://www.unicode.org/reports/tr44/tr44-14.html#UnicodeData.txt

+ # https://www.unicode.org/reports/tr44/tr44-14.html#UnicodeData.txt

general_category = fields[2]

decomposition = fields[5]

decomposition = re.sub(decomposition_type_pattern, ' ', decomposition)

@@ -281,8 +281,8 @@ def main(args):

if __name__ == "__main__":

parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')

- parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt. See <http://unicode.org/Public/8.0.0/ucd/UnicodeData.txt>.", type=str, required=True, dest='unicodeDataFilePath')

- parser.add_argument("--latin-ascii-file", help="Path to XML file from Unicode Common Locale Data Repository (CLDR) corresponding to Latin-ASCII transliterator (Latin-ASCII.xml). See <http://unicode.org/cldr/trac/export/12304/tags/release-28/common/transforms/Latin-ASCII.xml>.", type=str, dest='latinAsciiFilePath')

+ parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')

+ parser.add_argument("--latin-ascii-file", help="Path to XML file from Unicode Common Locale Data Repository (CLDR) corresponding to Latin-ASCII transliterator (Latin-ASCII.xml).", type=str, dest='latinAsciiFilePath')

parser.add_argument("--no-ligatures-expansion", help="Do not expand ligatures and do not use Unicode CLDR Latin-ASCII transliterator. By default, this option is not enabled and \"--latin-ascii-file\" argument is required. If this option is enabled, \"--latin-ascii-file\" argument is optional and ignored.", action="store_true", dest='noLigaturesExpansion')

args = parser.parse_args()

diff --git a/doc/src/sgml/acronyms.sgml b/doc/src/sgml/acronyms.sgml

index 411e368a9c67edf5ba48e1a04095a427d56bb746..f638665dc926de2dd82d606a142a76b9ab44c92a 100644 (file)

--- a/doc/src/sgml/acronyms.sgml

+++ b/doc/src/sgml/acronyms.sgml

@@ -728,7 +728,7 @@

<para>

- <ulink url="http://www.unicode.org/">Unicode Transformation

+ <ulink url="https://www.unicode.org/">Unicode Transformation

Format</ulink>

</para>

</listitem>

diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml

index b672da47d0a506aa0841d1dc2011abd9a1338fb6..45290bd27bf93f43bef2de76cb52876f0d9185eb 100644 (file)

--- a/doc/src/sgml/charset.sgml

+++ b/doc/src/sgml/charset.sgml

@@ -832,12 +832,12 @@ CREATE COLLATION german (provider = libc, locale = 'de_DE');

</varlistentry>

</variablelist>

- See <ulink url="http://unicode.org/reports/tr35/tr35-collation.html">Unicode

+ See <ulink url="https://www.unicode.org/reports/tr35/tr35-collation.html">Unicode

Technical Standard #35</ulink>

and <ulink url="https://tools.ietf.org/html/bcp47">BCP 47</ulink> for

details. The list of possible collation types (<literal>co</literal>

subtag) can be found in

- the <ulink url="http://www.unicode.org/repos/cldr/trunk/common/bcp47/collation.xml">CLDR

+ the <ulink url="https://github.com/unicode-org/cldr/blob/master/common/bcp47/collation.xml">CLDR

repository</ulink>.

The <ulink url="https://ssl.icu-project.org/icu-bin/locexp">ICU Locale

Explorer</ulink> can be used to check the details of a particular locale

@@ -900,7 +900,7 @@ CREATE COLLATION french FROM "fr-x-icu";

different Unicode normal forms. It is up to the collation provider to

actually implement such insensitive comparisons; the deterministic flag

only determines whether ties are to be broken using bytewise comparison.

- See also <ulink url="https://unicode.org/reports/tr10">Unicode Technical

+ See also <ulink url="https://www.unicode.org/reports/tr10">Unicode Technical

Standard 10</ulink> for more information on the terminology.

</para>

@@ -1926,7 +1926,7 @@ RESET client_encoding;

</varlistentry>

- <term><ulink url="http://www.unicode.org/"></ulink></term>

+ <term><ulink url="https://www.unicode.org/"></ulink></term>

<para>

diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile

index a97e1c6cd72547ec7179427efacb1cdfbf0f8b06..63710f9ea73e45c252735365f341f10a13d5f5b4 100644 (file)

--- a/src/backend/utils/mb/Unicode/Makefile

+++ b/src/backend/utils/mb/Unicode/Makefile

@@ -119,7 +119,7 @@ DOWNLOAD = wget -O $@ --no-use-server-timestamps

#DOWNLOAD = curl -o $@

BIG5.TXT CNS11643.TXT:

- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/$(@F)

+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/$(@F)

euc-jis-2004-std.txt sjis-0213-2004-std.txt:

$(DOWNLOAD) http://x0213.org/codetable/$(@F)

@@ -131,19 +131,19 @@ GB2312.TXT:

$(DOWNLOAD) 'http://trac.greenstone.org/browser/trunk/gsdl/unicode/MAPPINGS/EASTASIA/GB/GB2312.TXT?rev=1842&format=txt'

JIS0212.TXT:

- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/$(@F)

+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/$(@F)

JOHAB.TXT KSX1001.TXT:

- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/$(@F)

+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/$(@F)

KOI8-R.TXT KOI8-U.TXT:

- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/$(@F)

+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/$(@F)

$(ISO8859TEXTS):

- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/ISO8859/$(@F)

+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/ISO8859/$(@F)

$(filter-out CP8%,$(WINTEXTS)) CP932.TXT CP950.TXT:

- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/$(@F)

+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/$(@F)

$(filter CP8%,$(WINTEXTS)):

- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/$(@F)

+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/$(@F)

diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl

index bcdd29b686fe4b2beda35d3c1a8bb516788eefcb..297f7b98938183eeaf247ebc18702ae3b1cb2ebd 100755 (executable)

--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl

+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl

@@ -8,8 +8,8 @@

# map files provided by Unicode organization.

# Unfortunately it is prohibited by the organization

# to distribute the map files. So if you try to use this script,

-# you have to obtain the map files from the organization's ftp site.

-# ftp://www.unicode.org/Public/MAPPINGS/

+# you have to obtain the map files from the organization's download site.

+# https://www.unicode.org/Public/MAPPINGS/

# Our "big5" comes from BIG5.TXT, with the addition of the characters

# in the range 0xf9d6-0xf9dc from CP950.TXT.

diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl

index 222093dff220bd7fc09e09d4cea97a023a323f5c..8645a7ea6efbc48e71344327cee6ec1d52af4fbf 100755 (executable)

--- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl

+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl

@@ -8,8 +8,8 @@

# map files provided by Unicode organization.

# Unfortunately it is prohibited by the organization

# to distribute the map files. So if you try to use this script,

-# you have to obtain the map files from the organization's ftp site.

-# ftp://www.unicode.org/Public/MAPPINGS/

+# you have to obtain the map files from the organization's download site.

+# https://www.unicode.org/Public/MAPPINGS/

# We assume the file include three tab-separated columns:

# JOHAB code in hex

# UCS-2 code in hex

diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl

index 647417b4bf8901e13ed94a05dfdd92f14e325d7e..2290feddf4e99d05cc25c90288805045389abc5f 100755 (executable)

--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl

+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl

@@ -8,8 +8,8 @@

# map files provided by Unicode organization.

# Unfortunately it is prohibited by the organization

# to distribute the map files. So if you try to use this script,

-# you have to obtain the map files from the organization's ftp site.

-# ftp://www.unicode.org/Public/MAPPINGS/

+# you have to obtain the map files from the organization's download site.

+# https://www.unicode.org/Public/MAPPINGS/

# We assume the file include three tab-separated columns:

# source character set code in hex

# UCS-2 code in hex

diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile

index e20ef778f3458efc44be24b982b4a922e0f635b4..334859c98480a295c6800e20e09b452f7ea2ac3e 100644 (file)

--- a/src/common/unicode/Makefile

+++ b/src/common/unicode/Makefile

@@ -23,7 +23,7 @@ DOWNLOAD = wget -O $@ --no-use-server-timestamps

# These files are part of the Unicode Character Database. Download

# them on demand.

UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt:

- $(DOWNLOAD) http://unicode.org/Public/UNIDATA/$(@F)

+ $(DOWNLOAD) https://www.unicode.org/Public/UNIDATA/$(@F)

# Generation of conversion tables used for string normalization with

# UTF-8 strings.

diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c

index 89c553321286e4c194b708532e76293853ca1170..7509f8143794a12a9013d23624e30091aa96d84f 100644 (file)

--- a/src/common/unicode_norm.c

+++ b/src/common/unicode_norm.c

@@ -3,7 +3,7 @@

* Normalize a Unicode string to NFKC form

* This implements Unicode normalization, per the documentation at

- * http://www.unicode.org/reports/tr15/.

+ * https://www.unicode.org/reports/tr15/.

@@ -109,7 +109,7 @@ get_decomposed_size(pg_wchar code)

* Fast path for Hangul characters not stored in tables to save memory as

* decomposition is algorithmic. See

- * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on

+ * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details on

* the matter.

if (code >= SBASE && code < SBASE + SCOUNT)

@@ -234,7 +234,7 @@ decompose_code(pg_wchar code, pg_wchar **result, int *current)

* Fast path for Hangul characters not stored in tables to save memory as

* decomposition is algorithmic. See

- * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on

+ * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details on

* the matter.

if (code >= SBASE && code < SBASE + SCOUNT)

@@ -362,7 +362,7 @@ unicode_normalize_kc(const pg_wchar *input)

continue;

- * Per Unicode (http://unicode.org/reports/tr15/tr15-18.html) annex 4,

+ * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html) annex 4,

* a sequence of two adjacent characters in a string is an

* exchangeable pair if the combining class (from the Unicode

* Character Database) for the first character is greater than the

This is the main PostgreSQL git repository.

RSS Atom