I have the following json, which I'm trying to parse.
>>> string1
u'{"content":{"search_highlight":{},"message_exchanged":{"messagesOnlyToViewee":true,"messagesOnlyToViewer":true},"Certifications":{"certsMpr":{},"empty":{}},"lix_treasury_callout":"B","network_overview":{"img_overview_locked":"http://s.c.lnkd.licdn.com/scds/common/u/img/pic/pic_network_overview_locked_178x276.png","lix_showDetail":"control"},"Projects":{"empty":{},"projectsMpr":{}},"lix_discovery_order":"control","Volunteering":{"volunteer":{},"empty":{}},"lix_treasury_upload":"B","connections":{},"view_tracking":{},"Badge":{"badges":{},"empty":{}},"Patents":{"patentsMpr":{},"empty":{}},"Publications":{"empty":{},"pubsMpr":{}},"Summary":{"summary":{"deferImg":true,"hasSummaryOrSpecialties":false,"visible":true,"showSummarySection":false,"associatedWith":{}},"empty":{}},"Notes":{},"frontierajaxform__text_plain__there_were":"There were one or more errors in your submission. Please correct the marked fields below.","ContactInfo":{"distance":{"distance":\\u002d1,"numberOfConnections":2},"contact_info":{"deferImg":true,"showTwitter":true,"visible":true}},"Following":{"follow_channels":{},"follow_school":{},"follow_people":{"count":0,"viewee":{"id":29841231},"influencerSeeMore":\\u002d7,"influencers":[]},"follow":{"i18n_following_section_label":"Following","i18n_currently_following":"Following","isCondensed":true,"companyFolloweeCount":1,"industryFolloweeCount":1,"i18n_unfollow":"Unfollow","industryFollowees":[{"link_industry":"/today/insurance?trk=prof\\u002dfollowing\\u002dindustry\\u002dicon","universalName":"insurance","id":42,"i18n_x_followers":"93,628 followers","ind_follow":"/lite/follow?type=INDUSTRY&id=42&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1","canonicalName":"Insurance","fmt_following_count":"93,628","ind_unfollow":"/lite/unfollow?type=INDUSTRY&id=42&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1","isShared":false}],"i18n_follow":"Follow","i18n_see_less":"See less","companyFollowees":[{"link_biz":"/company/metlife?trk=prof\\u002dfollowing\\u002dcompany\\u002dlogo","universalName":"metlife","id":2213,"logo":"http://m.c.lnkd.licdn.com/media/p/2/000/021/14e/05da35a.png","canonicalName":"MetLife","biz_follow":"/company/follow/submit?id=2213&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1","ind_lookup":"Insurance","isShared":false,"logoId":"/p/2/000/021/14e/05da35a.png"}],"i18n_news":"News","isFollowing":true,"lix_profile_showChannels":"control"}},"BasicInfo":{"empty":{},"upsell":{"deferImg":true,"visible":true},"basic_info":{"showTopCardDetail":true,"visible":true,"phoneticname":"","i18n__Industry":"Industry","industry_pivot":"/search?search=&industry=42&sortCriteria=R&keepFacets=true&trk=prof\\u002d0\\u002dovw\\u002dindustry","find_others_region":"Find other members in Mumbai Area, India","headline_highlight":"Manager at Metlife","i18n__find_others_in_industry":"Find other members in this industry","i18n_Edit":"Edit"}}}}'
But this wouldn't load when I say
>>> import re
>>> import json
>>> nw = json.loads(string1)
Traceback (most recent call last):
File "<pyshell#33>", line 1, in <module>
nw = json.loads(string1)
File "C:\Python27\lib\json\__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 365, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python27\lib\json\decoder.py", line 383, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
Thanks
asked Aug 13, 2013 at 11:14
user1946217
1,7636 gold badges31 silver badges41 bronze badges
-
\o/ Yay, you found an actual JSON comment that actually breaks! :-PMartijn Pieters– Martijn Pieters2013年08月13日 11:20:10 +00:00Commented Aug 13, 2013 at 11:20
1 Answer 1
There are double-quoted characters in your input string; dashes replaced by \u002d escape strings. That works okay in JavaScript, but not in JSON values.
You'll have to repair these first:
>>> json.loads(string1.replace(r'\u002d', '-'))
{u'content': {u'message_exchanged': {u'messagesOnlyToViewee': True, u'messagesOnlyToViewer': True}, u'view_tracking': {}, u'Certifications': {u'certsMpr': {}, u'empty': {}}, u'BasicInfo': {u'upsell': {u'deferImg': True, u'visible': True}, u'empty': {}, u'basic_info': {u'i18n__Industry': u'Industry', u'industry_pivot': u'/search?search=&industry=42&sortCriteria=R&keepFacets=true&trk=prof-0-ovw-industry', u'find_others_region': u'Find other members in Mumbai Area, India', u'headline_highlight': u'Manager at Metlife', u'showTopCardDetail': True, u'i18n_Edit': u'Edit', u'visible': True, u'i18n__find_others_in_industry': u'Find other members in this industry', u'phoneticname': u''}}, u'Volunteering': {u'empty': {}, u'volunteer': {}}, u'frontierajaxform__text_plain__there_were': u'There were one or more errors in your submission. Please correct the marked fields below.', u'lix_treasury_callout': u'B', u'Badge': {u'empty': {}, u'badges': {}}, u'Notes': {}, u'lix_treasury_upload': u'B', u'Summary': {u'empty': {}, u'summary': {u'deferImg': True, u'hasSummaryOrSpecialties': False, u'associatedWith': {}, u'showSummarySection': False, u'visible': True}}, u'connections': {}, u'search_highlight': {}, u'Patents': {u'empty': {}, u'patentsMpr': {}}, u'lix_discovery_order': u'control', u'Following': {u'follow': {u'i18n_see_less': u'See less', u'isFollowing': True, u'i18n_following_section_label': u'Following', u'lix_profile_showChannels': u'control', u'companyFolloweeCount': 1, u'industryFollowees': [{u'ind_unfollow': u'/lite/unfollow?type=INDUSTRY&id=42&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1', u'canonicalName': u'Insurance', u'link_industry': u'/today/insurance?trk=prof-following-industry-icon', u'i18n_x_followers': u'93,628 followers', u'ind_follow': u'/lite/follow?type=INDUSTRY&id=42&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1', u'universalName': u'insurance', u'id': 42, u'fmt_following_count': u'93,628', u'isShared': False}], u'i18n_unfollow': u'Unfollow', u'i18n_follow': u'Follow', u'i18n_news': u'News', u'industryFolloweeCount': 1, u'isCondensed': True, u'companyFollowees': [{u'biz_follow': u'/company/follow/submit?id=2213&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1', u'canonicalName': u'MetLife', u'ind_lookup': u'Insurance', u'logoId': u'/p/2/000/021/14e/05da35a.png', u'logo': u'http://m.c.lnkd.licdn.com/media/p/2/000/021/14e/05da35a.png', u'universalName': u'metlife', u'id': 2213, u'link_biz': u'/company/metlife?trk=prof-following-company-logo', u'isShared': False}], u'i18n_currently_following': u'Following'}, u'follow_people': {u'count': 0, u'influencerSeeMore': -7, u'influencers': [], u'viewee': {u'id': 29841231}}, u'follow_school': {}, u'follow_channels': {}}, u'ContactInfo': {u'distance': {u'distance': -1, u'numberOfConnections': 2}, u'contact_info': {u'deferImg': True, u'showTwitter': True, u'visible': True}}, u'network_overview': {u'lix_showDetail': u'control', u'img_overview_locked': u'http://s.c.lnkd.licdn.com/scds/common/u/img/pic/pic_network_overview_locked_178x276.png'}, u'Projects': {u'projectsMpr': {}, u'empty': {}}, u'Publications': {u'pubsMpr': {}, u'empty': {}}}}
Whomever generated your HTML comments wanted to prevent - characters from being interpreted as the end-of-comment --> character sequence, so they escaped the dashes using Javascript unicode escape sequences.
answered Aug 13, 2013 at 11:19
Martijn Pieters
1.1m326 gold badges4.2k silver badges3.5k bronze badges
Sign up to request clarification or add additional context in comments.
3 Comments
user1946217
Thanks for the answer. I would like to know if there is any way to handle such characters more dynamically. Because I see '\u2605' something like this also in my data.
Martijn Pieters
If those escape codes are inside JSON strings (within quotes) then they are legal values in your JSON. They are only a problem when not part of a string.
Martijn Pieters
The
\u2605 character is a ★ star character for example. That would never show up outside of a string.lang-py