[Python-checkins] python/dist/src/Lib/test cjkencodings_test.py, NONE, 1.1 test_codecencodings_cn.py, NONE, 1.1 test_codecencodings_jp.py, NONE, 1.1 test_codecencodings_kr.py, NONE, 1.1 test_codecencodings_tw.py, NONE, 1.1 test_codecmaps_cn.py, NONE, 1.1 test_codecmaps_jp.py, NONE, 1.1 test_codecmaps_kr.py, NONE, 1.1 test_codecmaps_tw.py, NONE, 1.1 test_multibytecodec.py, NONE, 1.1 test_multibytecodec_support.py, NONE, 1.1 regrtest.py, 1.150, 1.151

Sat Jan 17 09:29:30 EST 2004

Update of /cvsroot/python/python/dist/src/Lib/test
In directory sc8-pr-cvs1:/tmp/cvs-serv14239/Lib/test
Modified Files:
	regrtest.py 
Added Files:
	cjkencodings_test.py test_codecencodings_cn.py 
	test_codecencodings_jp.py test_codecencodings_kr.py 
	test_codecencodings_tw.py test_codecmaps_cn.py 
	test_codecmaps_jp.py test_codecmaps_kr.py test_codecmaps_tw.py 
	test_multibytecodec.py test_multibytecodec_support.py 
Log Message:
Add CJK codecs support as discussed on python-dev. (SF #873597)
Several style fixes are suggested by Martin v. Loewis and
Marc-Andre Lemburg. Thanks!
--- NEW FILE: cjkencodings_test.py ---
teststring = {
'big5': (
"\xa6\x70\xa6\xf3\xa6\x62\x20\x50\x79\x74\x68\x6f\x6e\x20\xa4\xa4"
"\xa8\xcf\xa5\xce\xac\x4a\xa6\xb3\xaa\xba\x20\x43\x20\x6c\x69\x62"
"\x72\x61\x72\x79\x3f\x0a\xa1\x40\xa6\x62\xb8\xea\xb0\x54\xac\xec"
"\xa7\xde\xa7\xd6\xb3\x74\xb5\x6f\xae\x69\xaa\xba\xa4\xb5\xa4\xd1"
"\x2c\x20\xb6\x7d\xb5\x6f\xa4\xce\xb4\xfa\xb8\xd5\xb3\x6e\xc5\xe9"
"\xaa\xba\xb3\x74\xab\xd7\xac\x4f\xa4\xa3\xae\x65\xa9\xbf\xb5\xf8"
"\xaa\xba\x0a\xbd\xd2\xc3\x44\x2e\x20\xac\xb0\xa5\x5b\xa7\xd6\xb6"
"\x7d\xb5\x6f\xa4\xce\xb4\xfa\xb8\xd5\xaa\xba\xb3\x74\xab\xd7\x2c"
"\x20\xa7\xda\xad\xcc\xab\x4b\xb1\x60\xa7\xc6\xb1\xe6\xaf\xe0\xa7"
"\x51\xa5\xce\xa4\x40\xa8\xc7\xa4\x77\xb6\x7d\xb5\x6f\xa6\x6e\xaa"
"\xba\x0a\x6c\x69\x62\x72\x61\x72\x79\x2c\x20\xa8\xc3\xa6\xb3\xa4"
"\x40\xad\xd3\x20\x66\x61\x73\x74\x20\x70\x72\x6f\x74\x6f\x74\x79"
"\x70\x69\x6e\x67\x20\xaa\xba\x20\x70\x72\x6f\x67\x72\x61\x6d\x6d"
"\x69\x6e\x67\x20\x6c\x61\x6e\x67\x75\x61\x67\x65\x20\xa5\x69\x0a"
"\xa8\xd1\xa8\xcf\xa5\xce\x2e\x20\xa5\xd8\xab\x65\xa6\xb3\xb3\x5c"
"\xb3\x5c\xa6\x68\xa6\x68\xaa\xba\x20\x6c\x69\x62\x72\x61\x72\x79"
"\x20\xac\x4f\xa5\x48\x20\x43\x20\xbc\x67\xa6\xa8\x2c\x20\xa6\xd3"
[...962 lines suppressed...]
"\x20\xe3\x81\xa7\xe3\x81\xaf\xe3\x81\x9d\xe3\x81\x86\xe3\x81\x84"
"\xe3\x81\xa3\xe3\x81\x9f\xe5\xb0\x8f\xe7\xb4\xb0\xe5\xb7\xa5\xe3"
"\x81\x8c\xe8\xbf\xbd\xe5\x8a\xa0\xe3\x81\x95\xe3\x82\x8c\xe3\x82"
"\x8b\xe3\x81\x93\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\x82\xe3\x81\xbe"
"\xe3\x82\x8a\xe3\x81\x82\xe3\x82\x8a\xe3\x81\xbe\xe3\x81\x9b\xe3"
"\x82\x93\xe3\x80\x82\x0a\xe8\xa8\x80\xe8\xaa\x9e\xe8\x87\xaa\xe4"
"\xbd\x93\xe3\x81\xae\xe6\xa9\x9f\xe8\x83\xbd\xe3\x81\xaf\xe6\x9c"
"\x80\xe5\xb0\x8f\xe9\x99\x90\xe3\x81\xab\xe6\x8a\xbc\xe3\x81\x95"
"\xe3\x81\x88\xe3\x80\x81\xe5\xbf\x85\xe8\xa6\x81\xe3\x81\xaa\xe6"
"\xa9\x9f\xe8\x83\xbd\xe3\x81\xaf\xe6\x8b\xa1\xe5\xbc\xb5\xe3\x83"
"\xa2\xe3\x82\xb8\xe3\x83\xa5\xe3\x83\xbc\xe3\x83\xab\xe3\x81\xa8"
"\xe3\x81\x97\xe3\x81\xa6\xe8\xbf\xbd\xe5\x8a\xa0\xe3\x81\x99\xe3"
"\x82\x8b\xe3\x80\x81\xe3\x81\xa8\xe3\x81\x84\xe3\x81\x86\xe3\x81"
"\xae\xe3\x81\x8c\x20\x50\x79\x74\x68\x6f\x6e\x20\xe3\x81\xae\xe3"
"\x83\x9d\xe3\x83\xaa\xe3\x82\xb7\xe3\x83\xbc\xe3\x81\xa7\xe3\x81"
"\x99\xe3\x80\x82\x0a\x0a\xe3\x83\x8e\xe3\x81\x8b\xe3\x82\x9a\x20"
"\xe3\x83\x88\xe3\x82\x9a\x20\xe3\x83\x88\xe3\x82\xad\xef\xa8\xb6"
"\xef\xa8\xb9\x20\xf0\xa1\x9a\xb4\xf0\xaa\x8e\x8c\x20\xe9\xba\x80"
"\xe9\xbd\x81\xf0\xa9\x9b\xb0\x0a"),
}
--- NEW FILE: test_codecencodings_cn.py ---
#!/usr/bin/env python
#
# test_codecencodings_cn.py
# Codec encoding tests for PRC encodings.
#
# $CJKCodecs: test_codecencodings_cn.py,v 1.1 2003年12月19日 03:00:05 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'gb2312'
 tstring = test_multibytecodec_support.load_teststring('gb2312')
 codectests = (
 # invalid bytes
 ("abc\x81\x81\xc1\xc4", "strict", None),
 ("abc\xc8", "strict", None),
 ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
 ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
 ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"),
 ("\xc1\x64", "strict", None),
 )
class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'gbk'
 tstring = test_multibytecodec_support.load_teststring('gbk')
 codectests = (
 # invalid bytes
 ("abc\x80\x80\xc1\xc4", "strict", None), 
 ("abc\xc8", "strict", None), 
 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
 ("\x83\x34\x83\x31", "strict", None),
 )
class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'gb18030'
 tstring = test_multibytecodec_support.load_teststring('gb18030')
 codectests = (
 # invalid bytes
 ("abc\x80\x80\xc1\xc4", "strict", None),
 ("abc\xc8", "strict", None),
 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
 ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
 )
 has_iso10646 = True
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(Test_GB2312))
 suite.addTest(unittest.makeSuite(Test_GBK))
 suite.addTest(unittest.makeSuite(Test_GB18030))
 test_support.run_suite(suite)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_codecencodings_jp.py ---
#!/usr/bin/env python
#
# test_codecencodings_jp.py
# Codec encoding tests for Japanese encodings.
#
# $CJKCodecs: test_codecencodings_jp.py,v 1.2 2004年01月06日 09:25:37 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'cp932'
 tstring = test_multibytecodec_support.load_teststring('shift_jis')
 codectests = (
 # invalid bytes
 ("abc\x81\x00\x81\x00\x82\x84", "strict", None),
 ("abc\xf8", "strict", None),
 ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
 ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
 ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"),
 # sjis vs cp932
 ("\\\x7e", "replace", u"\\\x7e"),
 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
 )
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
 unittest.TestCase):
 encoding = 'euc_jisx0213'
 tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
 codectests = (
 # invalid bytes
 ("abc\x80\x80\xc1\xc4", "strict", None),
 ("abc\xc8", "strict", None),
 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"),
 ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
 ("\xc1\x64", "strict", None),
 ("\xa1\xc0", "strict", u"\uff3c"),
 )
 xmlcharnametest = (
 u"\xab\u211c\xbb = \u2329\u1234\u232a",
 "\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;"
 )
eucjp_commontests = (
 ("abc\x80\x80\xc1\xc4", "strict", None),
 ("abc\xc8", "strict", None),
 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"),
 ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
 ("\xc1\x64", "strict", None),
)
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
 unittest.TestCase):
 encoding = 'euc_jp'
 tstring = test_multibytecodec_support.load_teststring('euc_jp')
 codectests = eucjp_commontests + (
 ("\xa1\xc0\\", "strict", u"\uff3c\\"),
 (u"\xa5", "strict", "\x5c"),
 (u"\u203e", "strict", "\x7e"),
 )
class Test_EUC_JP_STRICT(test_multibytecodec_support.TestBase,
 unittest.TestCase):
 encoding = 'euc_jp_strict'
 tstring = test_multibytecodec_support.load_teststring('euc_jp')
 codectests = eucjp_commontests + (
 ("\xa1\xc0\\", "strict", u"\\\\"),
 (u"\xa5", "strict", None),
 (u"\u203e", "strict", None),
 )
shiftjis_commonenctests = (
 ("abc\x80\x80\x82\x84", "strict", None),
 ("abc\xf8", "strict", None),
 ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
 ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
 ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"),
)
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'shift_jis'
 tstring = test_multibytecodec_support.load_teststring('shift_jis')
 codectests = shiftjis_commonenctests + (
 ("\\\x7e", "strict", u"\\\x7e"),
 ("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
 )
class Test_SJIS_STRICT(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'shift_jis_strict'
 tstring = test_multibytecodec_support.load_teststring('shift_jis')
 codectests = shiftjis_commonenctests + (
 ("\\\x7e", "replace", u"\xa5\u203e"),
 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
 )
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'shift_jisx0213'
 tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
 codectests = (
 # invalid bytes
 ("abc\x80\x80\x82\x84", "strict", None),
 ("abc\xf8", "strict", None),
 ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
 ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
 ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"),
 # sjis vs cp932
 ("\\\x7e", "replace", u"\xa5\u203e"),
 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
 )
 xmlcharnametest = (
 u"\xab\u211c\xbb = \u2329\u1234\u232a",
 "\x85G&real;\x85Q = &lang;&#4660;&rang;"
 )
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(Test_CP932))
 suite.addTest(unittest.makeSuite(Test_EUC_JISX0213))
 suite.addTest(unittest.makeSuite(Test_EUC_JP_COMPAT))
 suite.addTest(unittest.makeSuite(Test_SJIS_COMPAT))
 if test_multibytecodec_support.__cjkcodecs__:
 suite.addTest(unittest.makeSuite(Test_EUC_JP_STRICT))
 suite.addTest(unittest.makeSuite(Test_SJIS_STRICT))
 suite.addTest(unittest.makeSuite(Test_SJISX0213))
 test_support.run_suite(suite)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_codecencodings_kr.py ---
#!/usr/bin/env python
#
# test_codecencodings_kr.py
# Codec encoding tests for ROK encodings.
#
# $CJKCodecs: test_codecencodings_kr.py,v 1.1 2003年12月19日 03:00:06 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'cp949'
 tstring = test_multibytecodec_support.load_teststring('cp949')
 codectests = (
 # invalid bytes
 ("abc\x80\x80\xc1\xc4", "strict", None),
 ("abc\xc8", "strict", None),
 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),
 )
class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'euc_kr'
 tstring = test_multibytecodec_support.load_teststring('euc_kr')
 codectests = (
 # invalid bytes
 ("abc\x80\x80\xc1\xc4", "strict", None),
 ("abc\xc8", "strict", None),
 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),
 )
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'johab'
 tstring = test_multibytecodec_support.load_teststring('johab')
 codectests = (
 # invalid bytes
 ("abc\x80\x80\xc1\xc4", "strict", None),
 ("abc\xc8", "strict", None),
 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"),
 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"),
 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\ucd27"),
 )
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(Test_CP949))
 suite.addTest(unittest.makeSuite(Test_EUCKR))
 suite.addTest(unittest.makeSuite(Test_JOHAB))
 test_support.run_suite(suite)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_codecencodings_tw.py ---
#!/usr/bin/env python
#
# test_codecencodings_tw.py
# Codec encoding tests for ROC encodings.
#
# $CJKCodecs: test_codecencodings_tw.py,v 1.1 2003年12月19日 03:00:06 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
 encoding = 'big5'
 tstring = test_multibytecodec_support.load_teststring('big5')
 codectests = (
 # invalid bytes
 ("abc\x80\x80\xc1\xc4", "strict", None),
 ("abc\xc8", "strict", None),
 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
 )
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(Test_Big5))
 test_support.run_suite(suite)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_codecmaps_cn.py ---
#!/usr/bin/env python
#
# test_codecmaps_cn.py
# Codec mapping tests for PRC encodings
#
# $CJKCodecs: test_codecmaps_cn.py,v 1.2 2004年01月17日 12:47:19 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestGB2312Map(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'gb2312'
 mapfilename = 'EUC-CN.TXT'
 mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-CN.TXT'
class TestGBKMap(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'gbk'
 mapfilename = 'CP936.TXT'
 mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/' \
 'MICSFT/WINDOWS/CP936.TXT'
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(TestGB2312Map))
 suite.addTest(unittest.makeSuite(TestGBKMap))
 test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestGB2312Map, TestGBKMap)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_codecmaps_jp.py ---
#!/usr/bin/env python
#
# test_codecmaps_jp.py
# Codec mapping tests for Japanese encodings
#
# $CJKCodecs: test_codecmaps_jp.py,v 1.2 2004年01月17日 12:47:19 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestCP932Map(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'cp932'
 mapfilename = 'CP932.TXT'
 mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
 'WINDOWS/CP932.TXT'
 supmaps = [
 ('\x80', u'\u0080'),
 ('\xa0', u'\uf8f0'),
 ('\xfd', u'\uf8f1'),
 ('\xfe', u'\uf8f2'),
 ('\xff', u'\uf8f3'),
 ]
 for i in range(0xa1, 0xe0):
 supmaps.append((chr(i), unichr(i+0xfec0)))
class TestEUCJPCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'euc_jp'
 mapfilename = 'EUC-JP.TXT'
 mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JP.TXT'
class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'shift_jis'
 mapfilename = 'SHIFTJIS.TXT'
 mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
 '/EASTASIA/JIS/SHIFTJIS.TXT'
 pass_enctest = [
 ('\x81_', u'\\'),
 ]
 pass_dectest = [
 ('\\', u'\xa5'),
 ('~', u'\u203e'),
 ('\x81_', u'\\'),
 ]
class TestSJISSTRICTMap(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'shift_jis_strict'
 mapfilename = 'SHIFTJIS.TXT'
 mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
 '/EASTASIA/JIS/SHIFTJIS.TXT'
class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'euc_jisx0213'
 mapfilename = 'EUC-JISX0213.TXT'
 mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JISX0213.TXT'
class TestSJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'shift_jisx0213'
 mapfilename = 'SHIFT_JISX0213.TXT'
 mapfileurl = 'http://people.freebsd.org/~perky/i18n/SHIFT_JISX0213.TXT'
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(TestCP932Map))
 suite.addTest(unittest.makeSuite(TestEUCJPCOMPATMap))
 suite.addTest(unittest.makeSuite(TestSJISCOMPATMap))
 if test_multibytecodec_support.__cjkcodecs__:
 suite.addTest(unittest.makeSuite(TestSJISSTRICTMap))
 suite.addTest(unittest.makeSuite(TestEUCJISX0213Map))
 suite.addTest(unittest.makeSuite(TestSJISX0213Map))
 test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestCP932Map,
 TestEUCJPCOMPATMap, TestSJISCOMPATMap, TestEUCJISX0213Map,
 TestSJISX0213Map)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_codecmaps_kr.py ---
#!/usr/bin/env python
#
# test_codecmaps_kr.py
# Codec mapping tests for ROK encodings
#
# $CJKCodecs: test_codecmaps_kr.py,v 1.2 2004年01月17日 12:47:19 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestCP949Map(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'cp949'
 mapfilename = 'CP949.TXT'
 mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT' \
 '/WINDOWS/CP949.TXT'
class TestEUCKRMap(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'euc_kr'
 mapfilename = 'EUC-KR.TXT'
 mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-KR.TXT'
class TestJOHABMap(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'johab'
 mapfilename = 'JOHAB.TXT'
 mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/' \
 'KSC/JOHAB.TXT'
 # KS X 1001 standard assigned 0x5c as WON SIGN.
 # but, in early 90s that is the only era used johab widely,
 # the most softwares implements it as REVERSE SOLIDUS.
 # So, we ignore the standard here.
 pass_enctest = [('\\', u'\u20a9')]
 pass_dectest = [('\\', u'\u20a9')]
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(TestCP949Map))
 suite.addTest(unittest.makeSuite(TestEUCKRMap))
 suite.addTest(unittest.makeSuite(TestJOHABMap))
 test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestCP949Map,
 TestEUCKRMap, TestJOHABMap)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_codecmaps_tw.py ---
#!/usr/bin/env python
#
# test_codecmaps_tw.py
# Codec mapping tests for ROC encodings
#
# $CJKCodecs: test_codecmaps_tw.py,v 1.2 2004年01月17日 12:47:19 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestBIG5Map(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'big5'
 mapfilename = 'BIG5.TXT'
 mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/' \
 'EASTASIA/OTHER/BIG5.TXT'
class TestCP950Map(test_multibytecodec_support.TestBase_Mapping,
 unittest.TestCase):
 encoding = 'cp950'
 mapfilename = 'CP950.TXT'
 mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
 'WINDOWS/CP950.TXT'
 pass_enctest = [
 ('\xa2\xcc', u'\u5341'),
 ('\xa2\xce', u'\u5345'),
 ]
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(TestBIG5Map))
 suite.addTest(unittest.makeSuite(TestCP950Map))
 test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestBIG5Map, TestCP950Map)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_multibytecodec.py ---
#!/usr/bin/env python
#
# test_multibytecodec.py
# Unit test for multibytecodec itself
#
# $CJKCodecs: test_multibytecodec.py,v 1.5 2004年01月06日 02:26:28 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest, StringIO, codecs
class Test_StreamWriter(unittest.TestCase):
 if len(u'\U00012345') == 2: # UCS2
 def test_gb18030(self):
 s= StringIO.StringIO()
 c = codecs.lookup('gb18030')[3](s)
 c.write(u'123')
 self.assertEqual(s.getvalue(), '123')
 c.write(u'\U00012345')
 self.assertEqual(s.getvalue(), '123\x907\x959')
 c.write(u'\U00012345'[0])
 self.assertEqual(s.getvalue(), '123\x907\x959')
 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
 self.assertEqual(s.getvalue(),
 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
 c.write(u'\U00012345'[0])
 self.assertEqual(s.getvalue(),
 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
 self.assertRaises(UnicodeError, c.reset)
 self.assertEqual(s.getvalue(),
 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
 # standard utf-8 codecs has broken StreamReader
 if test_multibytecodec_support.__cjkcodecs__:
 def test_utf_8(self):
 s= StringIO.StringIO()
 c = codecs.lookup('utf-8')[3](s)
 c.write(u'123')
 self.assertEqual(s.getvalue(), '123')
 c.write(u'\U00012345')
 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
 c.write(u'\U00012345'[0])
 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
 self.assertEqual(s.getvalue(),
 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
 '\xea\xb0\x80\xc2\xac')
 c.write(u'\U00012345'[0])
 self.assertEqual(s.getvalue(),
 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
 '\xea\xb0\x80\xc2\xac')
 c.reset()
 self.assertEqual(s.getvalue(),
 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
 '\xea\xb0\x80\xc2\xac\xed\xa0\x88')
 c.write(u'\U00012345'[1])
 self.assertEqual(s.getvalue(),
 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
 '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
 else: # UCS4
 pass
 def test_nullcoding(self):
 self.assertEqual(''.decode('utf-8'), u'')
 self.assertEqual(unicode('', 'utf-8'), u'')
 self.assertEqual(u''.encode('utf-8'), '')
 def test_str_decode(self):
 self.assertEqual('abcd'.encode('utf-8'), 'abcd')
def test_main():
 suite = unittest.TestSuite()
 suite.addTest(unittest.makeSuite(Test_StreamWriter))
 test_support.run_suite(suite)
if __name__ == "__main__":
 test_main()
--- NEW FILE: test_multibytecodec_support.py ---
#!/usr/bin/env python
#
# test_multibytecodec_support.py
# Common Unittest Routines for CJK codecs
#
# $CJKCodecs: test_multibytecodec_support.py,v 1.5 2004年01月17日 12:47:19 perky Exp $
import sys, codecs, os.path
import unittest
from test import test_support
from StringIO import StringIO
__cjkcodecs__ = 0 # define this as 0 for python
class TestBase:
 encoding = '' # codec name
 codec = None # codec tuple (with 4 elements)
 tstring = '' # string to test StreamReader
 codectests = None # must set. codec test tuple
 roundtriptest = 1 # set if roundtrip is possible with unicode
 has_iso10646 = 0 # set if this encoding contains whole iso10646 map
 xmlcharnametest = None # string to test xmlcharrefreplace
 def setUp(self):
 if self.codec is None:
 self.codec = codecs.lookup(self.encoding)
 self.encode, self.decode, self.reader, self.writer = self.codec
 def test_chunkcoding(self):
 for native, utf8 in zip(*[StringIO(f).readlines()
 for f in self.tstring]):
 u = self.decode(native)[0]
 self.assertEqual(u, utf8.decode('utf-8'))
 if self.roundtriptest:
 self.assertEqual(native, self.encode(u)[0])
 def test_errorhandle(self):
 for source, scheme, expected in self.codectests:
 if type(source) == type(''):
 func = self.decode
 else:
 func = self.encode
 if expected:
 result = func(source, scheme)[0]
 self.assertEqual(result, expected)
 else:
 self.assertRaises(UnicodeError, func, source, scheme)
 if sys.hexversion >= 0x02030000:
 def test_xmlcharrefreplace(self):
 if self.has_iso10646:
 return
 s = u"\u0b13\u0b23\u0b60 nd eggs"
 self.assertEqual(
 self.encode(s, "xmlcharrefreplace")[0],
 "&#2835;&#2851;&#2912; nd eggs"
 )
 def test_customreplace(self):
 if self.has_iso10646:
 return
 import htmlentitydefs
 names = {}
 for (key, value) in htmlentitydefs.entitydefs.items():
 if len(value)==1:
 names[value.decode('latin-1')] = self.decode(key)[0]
 else:
 names[unichr(int(value[2:-1]))] = self.decode(key)[0]
 def xmlcharnamereplace(exc):
 if not isinstance(exc, UnicodeEncodeError):
 raise TypeError("don't know how to handle %r" % exc)
 l = []
 for c in exc.object[exc.start:exc.end]:
 try:
 l.append(u"&%s;" % names[c])
 except KeyError:
 l.append(u"&#%d;" % ord(c))
 return (u"".join(l), exc.end)
 codecs.register_error(
 "test.xmlcharnamereplace", xmlcharnamereplace)
 if self.xmlcharnametest:
 sin, sout = self.xmlcharnametest
 else:
 sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
 sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;"
 self.assertEqual(self.encode(sin,
 "test.xmlcharnamereplace")[0], sout)
 def test_streamreader(self):
 UTF8Writer = codecs.getwriter('utf-8')
 for name in ["read", "readline", "readlines"]:
 for sizehint in [None, -1] + range(1, 33) + \
 [64, 128, 256, 512, 1024]:
 istream = self.reader(StringIO(self.tstring[0]))
 ostream = UTF8Writer(StringIO())
 func = getattr(istream, name)
 while 1:
 data = func(sizehint)
 if not data:
 break
 if name == "readlines":
 ostream.writelines(data)
 else:
 ostream.write(data)
 self.assertEqual(ostream.getvalue(), self.tstring[1])
 def test_streamwriter(self):
 if __cjkcodecs__:
 readfuncs = ('read', 'readline', 'readlines')
 else:
 # standard utf8 codec has broken readline and readlines.
 readfuncs = ('read',)
 UTF8Reader = codecs.getreader('utf-8')
 for name in readfuncs:
 for sizehint in [None] + range(1, 33) + \
 [64, 128, 256, 512, 1024]:
 istream = UTF8Reader(StringIO(self.tstring[1]))
 ostream = self.writer(StringIO())
 func = getattr(istream, name)
 while 1:
 if sizehint is not None:
 data = func(sizehint)
 else:
 data = func()
 if not data:
 break
 if name == "readlines":
 ostream.writelines(data)
 else:
 ostream.write(data)
 self.assertEqual(ostream.getvalue(), self.tstring[0])
if len(u'\U00012345') == 2: # ucs2 build
 _unichr = unichr
 def unichr(v):
 if v >= 0x10000:
 return _unichr(0xd800 + ((v - 0x10000) >> 10)) + \
 _unichr(0xdc00 + ((v - 0x10000) & 0x3ff))
 else:
 return _unichr(v)
 _ord = ord
 def ord(c):
 if len(c) == 2:
 return 0x10000 + ((_ord(c[0]) - 0xd800) << 10) + \
 (ord(c[1]) - 0xdc00)
 else:
 return _ord(c)
class TestBase_Mapping(unittest.TestCase):
 pass_enctest = []
 pass_dectest = []
 supmaps = []
 def __init__(self, *args, **kw):
 unittest.TestCase.__init__(self, *args, **kw)
 if not os.path.exists(self.mapfilename):
 raise test_support.TestSkipped('%s not found, download from %s' %
 (self.mapfilename, self.mapfileurl))
 def test_mapping_file(self):
 unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))
 urt_wa = {}
 for line in open(self.mapfilename):
 if not line:
 break
 data = line.split('#')[0].strip().split()
 if len(data) != 2:
 continue
 csetval = eval(data[0])
 if csetval <= 0x7F:
 csetch = chr(csetval & 0xff)
 elif csetval >= 0x1000000:
 csetch = chr(csetval >> 24) + chr((csetval >> 16) & 0xff) + \
 chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
 elif csetval >= 0x10000:
 csetch = chr(csetval >> 16) + \
 chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
 elif csetval >= 0x100:
 csetch = chr(csetval >> 8) + chr(csetval & 0xff)
 else:
 continue
 unich = unichrs(data[1])
 if ord(unich) == 0xfffd or urt_wa.has_key(unich):
 continue
 urt_wa[unich] = csetch
 self._testpoint(csetch, unich)
 def test_mapping_supplemental(self):
 for mapping in self.supmaps:
 self._testpoint(*mapping)
 def _testpoint(self, csetch, unich):
 if (csetch, unich) not in self.pass_enctest:
 self.assertEqual(unich.encode(self.encoding), csetch)
 if (csetch, unich) not in self.pass_dectest:
 self.assertEqual(unicode(csetch, self.encoding), unich)
def load_teststring(encoding):
 if __cjkcodecs__:
 etxt = open(os.path.join('sampletexts', encoding) + '.txt').read()
 utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read()
 return (etxt, utxt)
 else:
 from test import cjkencodings_test
 return cjkencodings_test.teststring[encoding]
def register_skip_expected(*cases):
 for case in cases: # len(cases) must be 1 at least.
 for path in [os.path.curdir, os.path.pardir]:
 fn = os.path.join(path, case.mapfilename)
 if os.path.exists(fn):
 case.mapfilename = fn
 break
 else:
 sys.modules[case.__module__].skip_expected = True
 break
 else:
 sys.modules[case.__module__].skip_expected = False
Index: regrtest.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/regrtest.py,v
retrieving revision 1.150
retrieving revision 1.151
diff -C2 -d -r1.150 -r1.151
*** regrtest.py	20 Nov 2003 22:11:29 -0000	1.150
--- regrtest.py	17 Jan 2004 14:29:28 -0000	1.151
***************
*** 550,553 ****
--- 550,557 ----
 # Controlled by test_timeout.skip_expected. Requires the network
 # resource and a socket module.
+ # test_codecmaps_*
+ # Whether a skip is expected here depends on whether a large test
+ # input file has been downloaded. test_codecmaps_*.skip_expected
+ # controls that.
 
 _expectations = {
***************
*** 566,570 ****
 test_dbm
 test_dl
- test_email_codecs
 test_fcntl
 test_fork1
--- 570,573 ----
***************
*** 599,603 ****
 test_curses
 test_dl
- test_email_codecs
 test_gl
 test_imgfile
--- 602,605 ----
***************
*** 624,628 ****
 test_dbm
 test_dl
- test_email_codecs
 test_fcntl
 test_fork1
--- 626,629 ----
***************
*** 779,783 ****
 test_curses
 test_dl
- test_email_codecs
 test_gdbm
 test_gl
--- 780,783 ----
***************
*** 804,808 ****
 test_curses
 test_dbm
- test_email_codecs
 test_gdbm
 test_gl
--- 804,807 ----
***************
*** 851,855 ****
 test_curses
 test_dl
- test_email_codecs
 test_gdbm
 test_gl
--- 850,853 ----
***************
*** 877,881 ****
 test_curses
 test_dbm
- test_email_codecs
 test_gl
 test_imgfile
--- 875,878 ----
***************
*** 902,906 ****
 test_curses
 test_dl
- test_email_codecs
 test_gl
 test_imgfile
--- 899,902 ----
***************
*** 926,930 ****
 test_cd
 test_cl
- test_email_codecs
 test_gl
 test_imgfile
--- 922,925 ----
***************
*** 956,959 ****
--- 951,956 ----
 from test import test_socket_ssl
 from test import test_timeout
+ from test import test_codecmaps_cn, test_codecmaps_jp
+ from test import test_codecmaps_kr, test_codecmaps_tw
 
 self.valid = False
***************
*** 974,977 ****
--- 971,978 ----
 self.expected.add('test_timeout')
 
+ for cc in ('cn', 'jp', 'kr', 'tw'):
+ if eval('test_codecmaps_' + cc).skip_expected:
+ self.expected.add('test_codecmaps_' + cc)
+ 
 if not sys.platform in ("mac", "darwin"):
 MAC_ONLY = ["test_macostools", "test_macfs", "test_aepack",