[Python-checkins] python/dist/src/Lib/test cjkencodings_test.py,
NONE, 1.1 test_codecencodings_cn.py, NONE,
1.1 test_codecencodings_jp.py, NONE,
1.1 test_codecencodings_kr.py, NONE,
1.1 test_codecencodings_tw.py, NONE, 1.1 test_codecmaps_cn.py,
NONE, 1.1 test_codecmaps_jp.py, NONE, 1.1 test_codecmaps_kr.py,
NONE, 1.1 test_codecmaps_tw.py, NONE,
1.1 test_multibytecodec.py, NONE,
1.1 test_multibytecodec_support.py, NONE, 1.1 regrtest.py,
1.150, 1.151
perky at users.sourceforge.net
perky at users.sourceforge.net
Sat Jan 17 09:29:30 EST 2004
- Previous message: [Python-checkins] python/dist/src/Lib/encodings big5.py, NONE,
1.1 cp932.py, NONE, 1.1 cp949.py, NONE, 1.1 cp950.py, NONE,
1.1 euc_jisx0213.py, NONE, 1.1 euc_jp.py, NONE, 1.1 euc_kr.py,
NONE, 1.1 gb18030.py, NONE, 1.1 gb2312.py, NONE, 1.1 gbk.py,
NONE, 1.1 hz.py, NONE, 1.1 iso2022_jp.py, NONE,
1.1 iso2022_jp_1.py, NONE, 1.1 iso2022_jp_2.py, NONE,
1.1 iso2022_jp_3.py, NONE, 1.1 iso2022_jp_ext.py, NONE,
1.1 iso2022_kr.py, NONE, 1.1 johab.py, NONE, 1.1 shift_jis.py,
NONE, 1.1 shift_jisx0213.py, NONE, 1.1 aliases.py, 1.20, 1.21
- Next message: [Python-checkins] python/dist/src/Modules Setup.dist,1.42,1.43
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/python/python/dist/src/Lib/test
In directory sc8-pr-cvs1:/tmp/cvs-serv14239/Lib/test
Modified Files:
regrtest.py
Added Files:
cjkencodings_test.py test_codecencodings_cn.py
test_codecencodings_jp.py test_codecencodings_kr.py
test_codecencodings_tw.py test_codecmaps_cn.py
test_codecmaps_jp.py test_codecmaps_kr.py test_codecmaps_tw.py
test_multibytecodec.py test_multibytecodec_support.py
Log Message:
Add CJK codecs support as discussed on python-dev. (SF #873597)
Several style fixes are suggested by Martin v. Loewis and
Marc-Andre Lemburg. Thanks!
--- NEW FILE: cjkencodings_test.py ---
teststring = {
'big5': (
"\xa6\x70\xa6\xf3\xa6\x62\x20\x50\x79\x74\x68\x6f\x6e\x20\xa4\xa4"
"\xa8\xcf\xa5\xce\xac\x4a\xa6\xb3\xaa\xba\x20\x43\x20\x6c\x69\x62"
"\x72\x61\x72\x79\x3f\x0a\xa1\x40\xa6\x62\xb8\xea\xb0\x54\xac\xec"
"\xa7\xde\xa7\xd6\xb3\x74\xb5\x6f\xae\x69\xaa\xba\xa4\xb5\xa4\xd1"
"\x2c\x20\xb6\x7d\xb5\x6f\xa4\xce\xb4\xfa\xb8\xd5\xb3\x6e\xc5\xe9"
"\xaa\xba\xb3\x74\xab\xd7\xac\x4f\xa4\xa3\xae\x65\xa9\xbf\xb5\xf8"
"\xaa\xba\x0a\xbd\xd2\xc3\x44\x2e\x20\xac\xb0\xa5\x5b\xa7\xd6\xb6"
"\x7d\xb5\x6f\xa4\xce\xb4\xfa\xb8\xd5\xaa\xba\xb3\x74\xab\xd7\x2c"
"\x20\xa7\xda\xad\xcc\xab\x4b\xb1\x60\xa7\xc6\xb1\xe6\xaf\xe0\xa7"
"\x51\xa5\xce\xa4\x40\xa8\xc7\xa4\x77\xb6\x7d\xb5\x6f\xa6\x6e\xaa"
"\xba\x0a\x6c\x69\x62\x72\x61\x72\x79\x2c\x20\xa8\xc3\xa6\xb3\xa4"
"\x40\xad\xd3\x20\x66\x61\x73\x74\x20\x70\x72\x6f\x74\x6f\x74\x79"
"\x70\x69\x6e\x67\x20\xaa\xba\x20\x70\x72\x6f\x67\x72\x61\x6d\x6d"
"\x69\x6e\x67\x20\x6c\x61\x6e\x67\x75\x61\x67\x65\x20\xa5\x69\x0a"
"\xa8\xd1\xa8\xcf\xa5\xce\x2e\x20\xa5\xd8\xab\x65\xa6\xb3\xb3\x5c"
"\xb3\x5c\xa6\x68\xa6\x68\xaa\xba\x20\x6c\x69\x62\x72\x61\x72\x79"
"\x20\xac\x4f\xa5\x48\x20\x43\x20\xbc\x67\xa6\xa8\x2c\x20\xa6\xd3"
[...962 lines suppressed...]
"\x20\xe3\x81\xa7\xe3\x81\xaf\xe3\x81\x9d\xe3\x81\x86\xe3\x81\x84"
"\xe3\x81\xa3\xe3\x81\x9f\xe5\xb0\x8f\xe7\xb4\xb0\xe5\xb7\xa5\xe3"
"\x81\x8c\xe8\xbf\xbd\xe5\x8a\xa0\xe3\x81\x95\xe3\x82\x8c\xe3\x82"
"\x8b\xe3\x81\x93\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\x82\xe3\x81\xbe"
"\xe3\x82\x8a\xe3\x81\x82\xe3\x82\x8a\xe3\x81\xbe\xe3\x81\x9b\xe3"
"\x82\x93\xe3\x80\x82\x0a\xe8\xa8\x80\xe8\xaa\x9e\xe8\x87\xaa\xe4"
"\xbd\x93\xe3\x81\xae\xe6\xa9\x9f\xe8\x83\xbd\xe3\x81\xaf\xe6\x9c"
"\x80\xe5\xb0\x8f\xe9\x99\x90\xe3\x81\xab\xe6\x8a\xbc\xe3\x81\x95"
"\xe3\x81\x88\xe3\x80\x81\xe5\xbf\x85\xe8\xa6\x81\xe3\x81\xaa\xe6"
"\xa9\x9f\xe8\x83\xbd\xe3\x81\xaf\xe6\x8b\xa1\xe5\xbc\xb5\xe3\x83"
"\xa2\xe3\x82\xb8\xe3\x83\xa5\xe3\x83\xbc\xe3\x83\xab\xe3\x81\xa8"
"\xe3\x81\x97\xe3\x81\xa6\xe8\xbf\xbd\xe5\x8a\xa0\xe3\x81\x99\xe3"
"\x82\x8b\xe3\x80\x81\xe3\x81\xa8\xe3\x81\x84\xe3\x81\x86\xe3\x81"
"\xae\xe3\x81\x8c\x20\x50\x79\x74\x68\x6f\x6e\x20\xe3\x81\xae\xe3"
"\x83\x9d\xe3\x83\xaa\xe3\x82\xb7\xe3\x83\xbc\xe3\x81\xa7\xe3\x81"
"\x99\xe3\x80\x82\x0a\x0a\xe3\x83\x8e\xe3\x81\x8b\xe3\x82\x9a\x20"
"\xe3\x83\x88\xe3\x82\x9a\x20\xe3\x83\x88\xe3\x82\xad\xef\xa8\xb6"
"\xef\xa8\xb9\x20\xf0\xa1\x9a\xb4\xf0\xaa\x8e\x8c\x20\xe9\xba\x80"
"\xe9\xbd\x81\xf0\xa9\x9b\xb0\x0a"),
}
--- NEW FILE: test_codecencodings_cn.py ---
#!/usr/bin/env python
#
# test_codecencodings_cn.py
# Codec encoding tests for PRC encodings.
#
# $CJKCodecs: test_codecencodings_cn.py,v 1.1 2003年12月19日 03:00:05 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'gb2312'
tstring = test_multibytecodec_support.load_teststring('gb2312')
codectests = (
# invalid bytes
("abc\x81\x81\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"),
("\xc1\x64", "strict", None),
)
class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'gbk'
tstring = test_multibytecodec_support.load_teststring('gbk')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
("\x83\x34\x83\x31", "strict", None),
)
class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'gb18030'
tstring = test_multibytecodec_support.load_teststring('gb18030')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
)
has_iso10646 = True
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_GB2312))
suite.addTest(unittest.makeSuite(Test_GBK))
suite.addTest(unittest.makeSuite(Test_GB18030))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_codecencodings_jp.py ---
#!/usr/bin/env python
#
# test_codecencodings_jp.py
# Codec encoding tests for Japanese encodings.
#
# $CJKCodecs: test_codecencodings_jp.py,v 1.2 2004年01月06日 09:25:37 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'cp932'
tstring = test_multibytecodec_support.load_teststring('shift_jis')
codectests = (
# invalid bytes
("abc\x81\x00\x81\x00\x82\x84", "strict", None),
("abc\xf8", "strict", None),
("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"),
# sjis vs cp932
("\\\x7e", "replace", u"\\\x7e"),
("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
)
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
unittest.TestCase):
encoding = 'euc_jisx0213'
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"),
("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
("\xc1\x64", "strict", None),
("\xa1\xc0", "strict", u"\uff3c"),
)
xmlcharnametest = (
u"\xab\u211c\xbb = \u2329\u1234\u232a",
"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
)
eucjp_commontests = (
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"),
("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
("\xc1\x64", "strict", None),
)
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
unittest.TestCase):
encoding = 'euc_jp'
tstring = test_multibytecodec_support.load_teststring('euc_jp')
codectests = eucjp_commontests + (
("\xa1\xc0\\", "strict", u"\uff3c\\"),
(u"\xa5", "strict", "\x5c"),
(u"\u203e", "strict", "\x7e"),
)
class Test_EUC_JP_STRICT(test_multibytecodec_support.TestBase,
unittest.TestCase):
encoding = 'euc_jp_strict'
tstring = test_multibytecodec_support.load_teststring('euc_jp')
codectests = eucjp_commontests + (
("\xa1\xc0\\", "strict", u"\\\\"),
(u"\xa5", "strict", None),
(u"\u203e", "strict", None),
)
shiftjis_commonenctests = (
("abc\x80\x80\x82\x84", "strict", None),
("abc\xf8", "strict", None),
("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"),
)
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jis'
tstring = test_multibytecodec_support.load_teststring('shift_jis')
codectests = shiftjis_commonenctests + (
("\\\x7e", "strict", u"\\\x7e"),
("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
)
class Test_SJIS_STRICT(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jis_strict'
tstring = test_multibytecodec_support.load_teststring('shift_jis')
codectests = shiftjis_commonenctests + (
("\\\x7e", "replace", u"\xa5\u203e"),
("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
)
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jisx0213'
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
codectests = (
# invalid bytes
("abc\x80\x80\x82\x84", "strict", None),
("abc\xf8", "strict", None),
("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"),
# sjis vs cp932
("\\\x7e", "replace", u"\xa5\u203e"),
("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
)
xmlcharnametest = (
u"\xab\u211c\xbb = \u2329\u1234\u232a",
"\x85Gℜ\x85Q = ⟨ሴ⟩"
)
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_CP932))
suite.addTest(unittest.makeSuite(Test_EUC_JISX0213))
suite.addTest(unittest.makeSuite(Test_EUC_JP_COMPAT))
suite.addTest(unittest.makeSuite(Test_SJIS_COMPAT))
if test_multibytecodec_support.__cjkcodecs__:
suite.addTest(unittest.makeSuite(Test_EUC_JP_STRICT))
suite.addTest(unittest.makeSuite(Test_SJIS_STRICT))
suite.addTest(unittest.makeSuite(Test_SJISX0213))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_codecencodings_kr.py ---
#!/usr/bin/env python
#
# test_codecencodings_kr.py
# Codec encoding tests for ROK encodings.
#
# $CJKCodecs: test_codecencodings_kr.py,v 1.1 2003年12月19日 03:00:06 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'cp949'
tstring = test_multibytecodec_support.load_teststring('cp949')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),
)
class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'euc_kr'
tstring = test_multibytecodec_support.load_teststring('euc_kr')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),
)
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'johab'
tstring = test_multibytecodec_support.load_teststring('johab')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\ucd27"),
)
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_CP949))
suite.addTest(unittest.makeSuite(Test_EUCKR))
suite.addTest(unittest.makeSuite(Test_JOHAB))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_codecencodings_tw.py ---
#!/usr/bin/env python
#
# test_codecencodings_tw.py
# Codec encoding tests for ROC encodings.
#
# $CJKCodecs: test_codecencodings_tw.py,v 1.1 2003年12月19日 03:00:06 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'big5'
tstring = test_multibytecodec_support.load_teststring('big5')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
)
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_Big5))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_codecmaps_cn.py ---
#!/usr/bin/env python
#
# test_codecmaps_cn.py
# Codec mapping tests for PRC encodings
#
# $CJKCodecs: test_codecmaps_cn.py,v 1.2 2004年01月17日 12:47:19 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestGB2312Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'gb2312'
mapfilename = 'EUC-CN.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-CN.TXT'
class TestGBKMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'gbk'
mapfilename = 'CP936.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/' \
'MICSFT/WINDOWS/CP936.TXT'
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestGB2312Map))
suite.addTest(unittest.makeSuite(TestGBKMap))
test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestGB2312Map, TestGBKMap)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_codecmaps_jp.py ---
#!/usr/bin/env python
#
# test_codecmaps_jp.py
# Codec mapping tests for Japanese encodings
#
# $CJKCodecs: test_codecmaps_jp.py,v 1.2 2004年01月17日 12:47:19 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestCP932Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'cp932'
mapfilename = 'CP932.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
'WINDOWS/CP932.TXT'
supmaps = [
('\x80', u'\u0080'),
('\xa0', u'\uf8f0'),
('\xfd', u'\uf8f1'),
('\xfe', u'\uf8f2'),
('\xff', u'\uf8f3'),
]
for i in range(0xa1, 0xe0):
supmaps.append((chr(i), unichr(i+0xfec0)))
class TestEUCJPCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'euc_jp'
mapfilename = 'EUC-JP.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JP.TXT'
class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'shift_jis'
mapfilename = 'SHIFTJIS.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
'/EASTASIA/JIS/SHIFTJIS.TXT'
pass_enctest = [
('\x81_', u'\\'),
]
pass_dectest = [
('\\', u'\xa5'),
('~', u'\u203e'),
('\x81_', u'\\'),
]
class TestSJISSTRICTMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'shift_jis_strict'
mapfilename = 'SHIFTJIS.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
'/EASTASIA/JIS/SHIFTJIS.TXT'
class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'euc_jisx0213'
mapfilename = 'EUC-JISX0213.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JISX0213.TXT'
class TestSJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'shift_jisx0213'
mapfilename = 'SHIFT_JISX0213.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/SHIFT_JISX0213.TXT'
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCP932Map))
suite.addTest(unittest.makeSuite(TestEUCJPCOMPATMap))
suite.addTest(unittest.makeSuite(TestSJISCOMPATMap))
if test_multibytecodec_support.__cjkcodecs__:
suite.addTest(unittest.makeSuite(TestSJISSTRICTMap))
suite.addTest(unittest.makeSuite(TestEUCJISX0213Map))
suite.addTest(unittest.makeSuite(TestSJISX0213Map))
test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestCP932Map,
TestEUCJPCOMPATMap, TestSJISCOMPATMap, TestEUCJISX0213Map,
TestSJISX0213Map)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_codecmaps_kr.py ---
#!/usr/bin/env python
#
# test_codecmaps_kr.py
# Codec mapping tests for ROK encodings
#
# $CJKCodecs: test_codecmaps_kr.py,v 1.2 2004年01月17日 12:47:19 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestCP949Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'cp949'
mapfilename = 'CP949.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT' \
'/WINDOWS/CP949.TXT'
class TestEUCKRMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'euc_kr'
mapfilename = 'EUC-KR.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-KR.TXT'
class TestJOHABMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'johab'
mapfilename = 'JOHAB.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/' \
'KSC/JOHAB.TXT'
# KS X 1001 standard assigned 0x5c as WON SIGN.
# but, in early 90s that is the only era used johab widely,
# the most softwares implements it as REVERSE SOLIDUS.
# So, we ignore the standard here.
pass_enctest = [('\\', u'\u20a9')]
pass_dectest = [('\\', u'\u20a9')]
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCP949Map))
suite.addTest(unittest.makeSuite(TestEUCKRMap))
suite.addTest(unittest.makeSuite(TestJOHABMap))
test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestCP949Map,
TestEUCKRMap, TestJOHABMap)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_codecmaps_tw.py ---
#!/usr/bin/env python
#
# test_codecmaps_tw.py
# Codec mapping tests for ROC encodings
#
# $CJKCodecs: test_codecmaps_tw.py,v 1.2 2004年01月17日 12:47:19 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestBIG5Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'big5'
mapfilename = 'BIG5.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/' \
'EASTASIA/OTHER/BIG5.TXT'
class TestCP950Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'cp950'
mapfilename = 'CP950.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
'WINDOWS/CP950.TXT'
pass_enctest = [
('\xa2\xcc', u'\u5341'),
('\xa2\xce', u'\u5345'),
]
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestBIG5Map))
suite.addTest(unittest.makeSuite(TestCP950Map))
test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestBIG5Map, TestCP950Map)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_multibytecodec.py ---
#!/usr/bin/env python
#
# test_multibytecodec.py
# Unit test for multibytecodec itself
#
# $CJKCodecs: test_multibytecodec.py,v 1.5 2004年01月06日 02:26:28 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest, StringIO, codecs
class Test_StreamWriter(unittest.TestCase):
if len(u'\U00012345') == 2: # UCS2
def test_gb18030(self):
s= StringIO.StringIO()
c = codecs.lookup('gb18030')[3](s)
c.write(u'123')
self.assertEqual(s.getvalue(), '123')
c.write(u'\U00012345')
self.assertEqual(s.getvalue(), '123\x907\x959')
c.write(u'\U00012345'[0])
self.assertEqual(s.getvalue(), '123\x907\x959')
c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
self.assertEqual(s.getvalue(),
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
c.write(u'\U00012345'[0])
self.assertEqual(s.getvalue(),
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
self.assertRaises(UnicodeError, c.reset)
self.assertEqual(s.getvalue(),
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
# standard utf-8 codecs has broken StreamReader
if test_multibytecodec_support.__cjkcodecs__:
def test_utf_8(self):
s= StringIO.StringIO()
c = codecs.lookup('utf-8')[3](s)
c.write(u'123')
self.assertEqual(s.getvalue(), '123')
c.write(u'\U00012345')
self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
c.write(u'\U00012345'[0])
self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
self.assertEqual(s.getvalue(),
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
'\xea\xb0\x80\xc2\xac')
c.write(u'\U00012345'[0])
self.assertEqual(s.getvalue(),
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
'\xea\xb0\x80\xc2\xac')
c.reset()
self.assertEqual(s.getvalue(),
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
'\xea\xb0\x80\xc2\xac\xed\xa0\x88')
c.write(u'\U00012345'[1])
self.assertEqual(s.getvalue(),
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
else: # UCS4
pass
def test_nullcoding(self):
self.assertEqual(''.decode('utf-8'), u'')
self.assertEqual(unicode('', 'utf-8'), u'')
self.assertEqual(u''.encode('utf-8'), '')
def test_str_decode(self):
self.assertEqual('abcd'.encode('utf-8'), 'abcd')
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_StreamWriter))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()
--- NEW FILE: test_multibytecodec_support.py ---
#!/usr/bin/env python
#
# test_multibytecodec_support.py
# Common Unittest Routines for CJK codecs
#
# $CJKCodecs: test_multibytecodec_support.py,v 1.5 2004年01月17日 12:47:19 perky Exp $
import sys, codecs, os.path
import unittest
from test import test_support
from StringIO import StringIO
__cjkcodecs__ = 0 # define this as 0 for python
class TestBase:
encoding = '' # codec name
codec = None # codec tuple (with 4 elements)
tstring = '' # string to test StreamReader
codectests = None # must set. codec test tuple
roundtriptest = 1 # set if roundtrip is possible with unicode
has_iso10646 = 0 # set if this encoding contains whole iso10646 map
xmlcharnametest = None # string to test xmlcharrefreplace
def setUp(self):
if self.codec is None:
self.codec = codecs.lookup(self.encoding)
self.encode, self.decode, self.reader, self.writer = self.codec
def test_chunkcoding(self):
for native, utf8 in zip(*[StringIO(f).readlines()
for f in self.tstring]):
u = self.decode(native)[0]
self.assertEqual(u, utf8.decode('utf-8'))
if self.roundtriptest:
self.assertEqual(native, self.encode(u)[0])
def test_errorhandle(self):
for source, scheme, expected in self.codectests:
if type(source) == type(''):
func = self.decode
else:
func = self.encode
if expected:
result = func(source, scheme)[0]
self.assertEqual(result, expected)
else:
self.assertRaises(UnicodeError, func, source, scheme)
if sys.hexversion >= 0x02030000:
def test_xmlcharrefreplace(self):
if self.has_iso10646:
return
s = u"\u0b13\u0b23\u0b60 nd eggs"
self.assertEqual(
self.encode(s, "xmlcharrefreplace")[0],
"ଓଣୠ nd eggs"
)
def test_customreplace(self):
if self.has_iso10646:
return
import htmlentitydefs
names = {}
for (key, value) in htmlentitydefs.entitydefs.items():
if len(value)==1:
names[value.decode('latin-1')] = self.decode(key)[0]
else:
names[unichr(int(value[2:-1]))] = self.decode(key)[0]
def xmlcharnamereplace(exc):
if not isinstance(exc, UnicodeEncodeError):
raise TypeError("don't know how to handle %r" % exc)
l = []
for c in exc.object[exc.start:exc.end]:
try:
l.append(u"&%s;" % names[c])
except KeyError:
l.append(u"&#%d;" % ord(c))
return (u"".join(l), exc.end)
codecs.register_error(
"test.xmlcharnamereplace", xmlcharnamereplace)
if self.xmlcharnametest:
sin, sout = self.xmlcharnametest
else:
sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
sout = "«ℜ» = ⟨ሴ⟩"
self.assertEqual(self.encode(sin,
"test.xmlcharnamereplace")[0], sout)
def test_streamreader(self):
UTF8Writer = codecs.getwriter('utf-8')
for name in ["read", "readline", "readlines"]:
for sizehint in [None, -1] + range(1, 33) + \
[64, 128, 256, 512, 1024]:
istream = self.reader(StringIO(self.tstring[0]))
ostream = UTF8Writer(StringIO())
func = getattr(istream, name)
while 1:
data = func(sizehint)
if not data:
break
if name == "readlines":
ostream.writelines(data)
else:
ostream.write(data)
self.assertEqual(ostream.getvalue(), self.tstring[1])
def test_streamwriter(self):
if __cjkcodecs__:
readfuncs = ('read', 'readline', 'readlines')
else:
# standard utf8 codec has broken readline and readlines.
readfuncs = ('read',)
UTF8Reader = codecs.getreader('utf-8')
for name in readfuncs:
for sizehint in [None] + range(1, 33) + \
[64, 128, 256, 512, 1024]:
istream = UTF8Reader(StringIO(self.tstring[1]))
ostream = self.writer(StringIO())
func = getattr(istream, name)
while 1:
if sizehint is not None:
data = func(sizehint)
else:
data = func()
if not data:
break
if name == "readlines":
ostream.writelines(data)
else:
ostream.write(data)
self.assertEqual(ostream.getvalue(), self.tstring[0])
if len(u'\U00012345') == 2: # ucs2 build
_unichr = unichr
def unichr(v):
if v >= 0x10000:
return _unichr(0xd800 + ((v - 0x10000) >> 10)) + \
_unichr(0xdc00 + ((v - 0x10000) & 0x3ff))
else:
return _unichr(v)
_ord = ord
def ord(c):
if len(c) == 2:
return 0x10000 + ((_ord(c[0]) - 0xd800) << 10) + \
(ord(c[1]) - 0xdc00)
else:
return _ord(c)
class TestBase_Mapping(unittest.TestCase):
pass_enctest = []
pass_dectest = []
supmaps = []
def __init__(self, *args, **kw):
unittest.TestCase.__init__(self, *args, **kw)
if not os.path.exists(self.mapfilename):
raise test_support.TestSkipped('%s not found, download from %s' %
(self.mapfilename, self.mapfileurl))
def test_mapping_file(self):
unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))
urt_wa = {}
for line in open(self.mapfilename):
if not line:
break
data = line.split('#')[0].strip().split()
if len(data) != 2:
continue
csetval = eval(data[0])
if csetval <= 0x7F:
csetch = chr(csetval & 0xff)
elif csetval >= 0x1000000:
csetch = chr(csetval >> 24) + chr((csetval >> 16) & 0xff) + \
chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
elif csetval >= 0x10000:
csetch = chr(csetval >> 16) + \
chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
elif csetval >= 0x100:
csetch = chr(csetval >> 8) + chr(csetval & 0xff)
else:
continue
unich = unichrs(data[1])
if ord(unich) == 0xfffd or urt_wa.has_key(unich):
continue
urt_wa[unich] = csetch
self._testpoint(csetch, unich)
def test_mapping_supplemental(self):
for mapping in self.supmaps:
self._testpoint(*mapping)
def _testpoint(self, csetch, unich):
if (csetch, unich) not in self.pass_enctest:
self.assertEqual(unich.encode(self.encoding), csetch)
if (csetch, unich) not in self.pass_dectest:
self.assertEqual(unicode(csetch, self.encoding), unich)
def load_teststring(encoding):
if __cjkcodecs__:
etxt = open(os.path.join('sampletexts', encoding) + '.txt').read()
utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read()
return (etxt, utxt)
else:
from test import cjkencodings_test
return cjkencodings_test.teststring[encoding]
def register_skip_expected(*cases):
for case in cases: # len(cases) must be 1 at least.
for path in [os.path.curdir, os.path.pardir]:
fn = os.path.join(path, case.mapfilename)
if os.path.exists(fn):
case.mapfilename = fn
break
else:
sys.modules[case.__module__].skip_expected = True
break
else:
sys.modules[case.__module__].skip_expected = False
Index: regrtest.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/regrtest.py,v
retrieving revision 1.150
retrieving revision 1.151
diff -C2 -d -r1.150 -r1.151
*** regrtest.py 20 Nov 2003 22:11:29 -0000 1.150
--- regrtest.py 17 Jan 2004 14:29:28 -0000 1.151
***************
*** 550,553 ****
--- 550,557 ----
# Controlled by test_timeout.skip_expected. Requires the network
# resource and a socket module.
+ # test_codecmaps_*
+ # Whether a skip is expected here depends on whether a large test
+ # input file has been downloaded. test_codecmaps_*.skip_expected
+ # controls that.
_expectations = {
***************
*** 566,570 ****
test_dbm
test_dl
- test_email_codecs
test_fcntl
test_fork1
--- 570,573 ----
***************
*** 599,603 ****
test_curses
test_dl
- test_email_codecs
test_gl
test_imgfile
--- 602,605 ----
***************
*** 624,628 ****
test_dbm
test_dl
- test_email_codecs
test_fcntl
test_fork1
--- 626,629 ----
***************
*** 779,783 ****
test_curses
test_dl
- test_email_codecs
test_gdbm
test_gl
--- 780,783 ----
***************
*** 804,808 ****
test_curses
test_dbm
- test_email_codecs
test_gdbm
test_gl
--- 804,807 ----
***************
*** 851,855 ****
test_curses
test_dl
- test_email_codecs
test_gdbm
test_gl
--- 850,853 ----
***************
*** 877,881 ****
test_curses
test_dbm
- test_email_codecs
test_gl
test_imgfile
--- 875,878 ----
***************
*** 902,906 ****
test_curses
test_dl
- test_email_codecs
test_gl
test_imgfile
--- 899,902 ----
***************
*** 926,930 ****
test_cd
test_cl
- test_email_codecs
test_gl
test_imgfile
--- 922,925 ----
***************
*** 956,959 ****
--- 951,956 ----
from test import test_socket_ssl
from test import test_timeout
+ from test import test_codecmaps_cn, test_codecmaps_jp
+ from test import test_codecmaps_kr, test_codecmaps_tw
self.valid = False
***************
*** 974,977 ****
--- 971,978 ----
self.expected.add('test_timeout')
+ for cc in ('cn', 'jp', 'kr', 'tw'):
+ if eval('test_codecmaps_' + cc).skip_expected:
+ self.expected.add('test_codecmaps_' + cc)
+
if not sys.platform in ("mac", "darwin"):
MAC_ONLY = ["test_macostools", "test_macfs", "test_aepack",
- Previous message: [Python-checkins] python/dist/src/Lib/encodings big5.py, NONE,
1.1 cp932.py, NONE, 1.1 cp949.py, NONE, 1.1 cp950.py, NONE,
1.1 euc_jisx0213.py, NONE, 1.1 euc_jp.py, NONE, 1.1 euc_kr.py,
NONE, 1.1 gb18030.py, NONE, 1.1 gb2312.py, NONE, 1.1 gbk.py,
NONE, 1.1 hz.py, NONE, 1.1 iso2022_jp.py, NONE,
1.1 iso2022_jp_1.py, NONE, 1.1 iso2022_jp_2.py, NONE,
1.1 iso2022_jp_3.py, NONE, 1.1 iso2022_jp_ext.py, NONE,
1.1 iso2022_kr.py, NONE, 1.1 johab.py, NONE, 1.1 shift_jis.py,
NONE, 1.1 shift_jisx0213.py, NONE, 1.1 aliases.py, 1.20, 1.21
- Next message: [Python-checkins] python/dist/src/Modules Setup.dist,1.42,1.43
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Python-checkins
mailing list