Return to Answer

Post Timeline

Rephrased mention of base64

Source Link

edited Feb 2, 2013 at 11:52

rkday

edited Feb 2, 2013 at 11:52

rkday

1.1k
8
12

ensure_ascii=False really only defers the issue to the decoding stage:

>>> dict2 = {'LeafTemps': '\xff\xff\xff\xff',}
>>> json1 = json.dumps(dict2, ensure_ascii=False)
>>> print(json1)
{"LeafTemps": "����"}
>>> json.loads(json1)
Traceback (most recent call last):
 File "<stdin>", line 1, in <module>
 File "/usr/lib/python2.7/json/__init__.py", line 328, in loads
 return _default_decoder.decode(s)
 File "/usr/lib/python2.7/json/decoder.py", line 365, in decode
 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
 File "/usr/lib/python2.7/json/decoder.py", line 381, in raw_decode
 obj, end = self.scan_once(s, idx)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xff in position 0: invalid start byte

Ultimately you can't store raw bytes in a JSON document, so you'll want to use some means of unambiguously encoding those bytes into a sequence of arbitrary bytes as an ASCII string - such as base64.

>>> import json
>>> from base64 import b64encode, b64decode
>>> my_dict = {'LeafTemps': '\xff\xff\xff\xff',} 
>>> my_dict['LeafTemps'] = b64encode(my_dict['LeafTemps'])
>>> json.dumps(my_dict)
'{"LeafTemps": "/////w=="}'
>>> json.loads(json.dumps(my_dict))
{u'LeafTemps': u'/////w=='}
>>> new_dict = json.loads(json.dumps(my_dict))
>>> new_dict['LeafTemps'] = b64decode(new_dict['LeafTemps'])
>>> print new_dict
{u'LeafTemps': '\xff\xff\xff\xff'}

ensure_ascii=False really only defers the issue to the decoding stage:

>>> dict2 = {'LeafTemps': '\xff\xff\xff\xff',}
>>> json1 = json.dumps(dict2, ensure_ascii=False)
>>> print(json1)
{"LeafTemps": "����"}
>>> json.loads(json1)
Traceback (most recent call last):
 File "<stdin>", line 1, in <module>
 File "/usr/lib/python2.7/json/__init__.py", line 328, in loads
 return _default_decoder.decode(s)
 File "/usr/lib/python2.7/json/decoder.py", line 365, in decode
 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
 File "/usr/lib/python2.7/json/decoder.py", line 381, in raw_decode
 obj, end = self.scan_once(s, idx)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xff in position 0: invalid start byte

Ultimately you can't store raw bytes in a JSON document, so you'll want to use some means of encoding those bytes into a string - such as base64.

>>> import json
>>> from base64 import b64encode, b64decode
>>> my_dict = {'LeafTemps': '\xff\xff\xff\xff',} 
>>> my_dict['LeafTemps'] = b64encode(my_dict['LeafTemps'])
>>> json.dumps(my_dict)
'{"LeafTemps": "/////w=="}'
>>> json.loads(json.dumps(my_dict))
{u'LeafTemps': u'/////w=='}
>>> new_dict = json.loads(json.dumps(my_dict))
>>> new_dict['LeafTemps'] = b64decode(new_dict['LeafTemps'])
>>> print new_dict
{u'LeafTemps': '\xff\xff\xff\xff'}

ensure_ascii=False really only defers the issue to the decoding stage:

>>> dict2 = {'LeafTemps': '\xff\xff\xff\xff',}
>>> json1 = json.dumps(dict2, ensure_ascii=False)
>>> print(json1)
{"LeafTemps": "����"}
>>> json.loads(json1)
Traceback (most recent call last):
 File "<stdin>", line 1, in <module>
 File "/usr/lib/python2.7/json/__init__.py", line 328, in loads
 return _default_decoder.decode(s)
 File "/usr/lib/python2.7/json/decoder.py", line 365, in decode
 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
 File "/usr/lib/python2.7/json/decoder.py", line 381, in raw_decode
 obj, end = self.scan_once(s, idx)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xff in position 0: invalid start byte

Ultimately you can't store raw bytes in a JSON document, so you'll want to use some means of unambiguously encoding a sequence of arbitrary bytes as an ASCII string - such as base64.

>>> import json
>>> from base64 import b64encode, b64decode
>>> my_dict = {'LeafTemps': '\xff\xff\xff\xff',} 
>>> my_dict['LeafTemps'] = b64encode(my_dict['LeafTemps'])
>>> json.dumps(my_dict)
'{"LeafTemps": "/////w=="}'
>>> json.loads(json.dumps(my_dict))
{u'LeafTemps': u'/////w=='}
>>> new_dict = json.loads(json.dumps(my_dict))
>>> new_dict['LeafTemps'] = b64decode(new_dict['LeafTemps'])
>>> print new_dict
{u'LeafTemps': '\xff\xff\xff\xff'}

Source Link

answered Feb 2, 2013 at 11:36

rkday

answered Feb 2, 2013 at 11:36

rkday

1.1k
8
12

ensure_ascii=False really only defers the issue to the decoding stage:

>>> dict2 = {'LeafTemps': '\xff\xff\xff\xff',}
>>> json1 = json.dumps(dict2, ensure_ascii=False)
>>> print(json1)
{"LeafTemps": "����"}
>>> json.loads(json1)
Traceback (most recent call last):
 File "<stdin>", line 1, in <module>
 File "/usr/lib/python2.7/json/__init__.py", line 328, in loads
 return _default_decoder.decode(s)
 File "/usr/lib/python2.7/json/decoder.py", line 365, in decode
 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
 File "/usr/lib/python2.7/json/decoder.py", line 381, in raw_decode
 obj, end = self.scan_once(s, idx)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xff in position 0: invalid start byte

Ultimately you can't store raw bytes in a JSON document, so you'll want to use some means of encoding those bytes into a string - such as base64.

>>> import json
>>> from base64 import b64encode, b64decode
>>> my_dict = {'LeafTemps': '\xff\xff\xff\xff',} 
>>> my_dict['LeafTemps'] = b64encode(my_dict['LeafTemps'])
>>> json.dumps(my_dict)
'{"LeafTemps": "/////w=="}'
>>> json.loads(json.dumps(my_dict))
{u'LeafTemps': u'/////w=='}
>>> new_dict = json.loads(json.dumps(my_dict))
>>> new_dict['LeafTemps'] = b64decode(new_dict['LeafTemps'])
>>> print new_dict
{u'LeafTemps': '\xff\xff\xff\xff'}

lang-py

CollectivesTM on Stack Overflow