I made the following snippet. It finds unique keys based on their values. For all keys \$s\$ that are contained in another key \$D\$ with the same value, key \$s\$ is discarded and key \$D\$ is returned.
newdict = {}
for key1, value1 in mydict.items():
for key2, value2 in mydict.items():
if ((key1 in key2) and (value1 == value2)):
if key1 in newdict:
del newdict[key1]
newdict[key2] = value2
print(newdict)
>>> mydict = {'d': 10, 'k': 10, 'n': 10, 'p': 10, 'j': 10, 'e': 10, 'q': 10, 'f': 10, 'z': 10, 'a': 10, 'i': 10, 'm': 10, 'o': 10, 'dk': 10, 'kn': 10, 'np': 10, 'pj': 10, 'je': 10, 'ek': 10, 'kq': 10, 'qf': 10, 'fp': 10, 'pd': 10, 'nz': 10, 'zq': 10, 'qj': 10, 'ja': 9, 'ap': 10, 'df': 10, 'fn': 10, 'nd': 10, 'ff': 10, 'fj': 10, 'jn': 10, 'nq': 10, 'qd': 10, 'di': 10, 'if': 10, 'nj': 10, 'jm': 10, 'mm': 10, 'ma': 10, 'af': 10, 'fm': 10, 'mf': 10, 'fi': 10, 'id': 10, 'dd': 10, 'da': 10, 'an': 10, 'dq': 10, 'qa': 10, 'jj': 10, 'jd': 10, 'dp': 10, 'pq': 10, 'de': 10, 'ei': 10, 'ik': 10, 'kp': 10, 'pa': 10, 'ad': 10, 'en': 10, 'nn': 10, 'pi': 10, 'ii': 10, 'io': 10, 'od': 10, 'do': 10, 'oz': 10, 'zp': 10, 'eq': 10, 'fd': 10, 'qz': 10, 'am': 10, 'mq': 9, 'fe': 10, 'dn': 10, 'ne': 10, 'ea': 10, 'no': 9, 'oi': 10, 'mi': 10, 'om': 10, 'md': 10, 'dj': 10, 'jf': 10, 'nk': 10, 'ka': 10, 'aa': 10, 'ak': 10, 'kf': 10, 'of': 10, 'pz': 10, 'po': 10, 'oa': 10, 'ee': 10, 'qe': 10, 'eo': 10, 'oo': 10, 'oq': 10, 'qq': 10, 'qk': 10, 'kd': 10, 'nm': 10, 'az': 10, 'zk': 10, 'pe': 10, 'fz': 10, 'ze': 10, 'ef': 10, 'fo': 10, 'dm': 10, 'mk': 10, 'kk': 10, 'kj': 9, 'jk': 10, 'qo': 10, 'qn': 10, 'na': 10, 'ai': 10, 'iq': 10, 'aq': 10, 'fk': 10, 'jp': 10, 'za': 10, 'mp': 10, 'qp': 9, 'pm': 10, 'pn': 10, 'ko': 9, 'op': 10, 'dz': 10, 'ed': 10, 'em': 10, 'jo': 10, 'mz': 10, 'zo': 9, 'km': 10, 'mj': 10, 'ke': 10, 'ej': 10, 'zm': 10, 'kz': 10, 'oj': 10, 'qi': 9, 'mn': 10, 'in': 10, 'iz': 10, 'zz': 10, 'zj': 9, 'pf': 10, 'fq': 10, 'qm': 10, 'me': 10, 'nf': 10, 'ie': 9, 'aj': 10, 'fa': 10, 'ni': 10, 'ae': 10, 'zd': 10, 'zn': 10, 'ia': 9, 'ao': 10, 'ok': 10, 'oe': 10, 'ij': 9, 'zi': 10, 'ji': 9, 'ep': 9, 'ki': 10, 'zf': 10, 'im': 9, 'ip': 10, 'mo': 10, 'ez': 10, 'jz': 9, 'on': 10, 'pk': 10, 'jq': 10, 'pp': 9, 'dkn': 5, 'npj': 4, 'pje': 4, 'jek': 4, 'kqf': 5, 'qfp': 4, 'fpd': 7, 'pdk': 4, 'qja': 5, 'jap': 4, 'apd': 4, 'pdf': 9, 'dfn': 9, 'fnd': 10, 'ndf': 7, 'dff': 8, 'ffj': 6, 'fjn': 7, 'jnq': 5, 'nqd': 6, 'qdi': 8, 'dif': 9, 'maf': 4, 'afm': 5, 'fmf': 5, 'mfi': 7, 'fid': 9, 'idd': 8, 'dda': 10, 'dan': 6, 'anp': 4, 'npd': 6, 'pdq': 4, 'qap': 5, 'dqj': 5, 'jjd': 5, 'jdp': 4, 'dpq': 6, 'pqd': 6, 'qde': 8, 'dei': 6, 'ikp': 5, 'pad': 6, 'adq': 4, 'dqd': 10, 'den': 5, 'enn': 4, 'pii': 4, 'iod': 5, 'odo': 5, 'doz': 7, 'deq': 7, 'qfd': 6, 'fdq': 7, 'dqz': 8, 'zpa': 7, 'eid': 7, 'idn': 8, 'dne': 5, 'nea': 4, 'oid': 7, 'ddd': 10, 'dam': 5, 'omd': 6, 'mdj': 6, 'djf': 6, 'jfn': 5, 'fnk': 7, 'aak': 4, 'akf': 5, 'kfd': 7, 'fdf': 10, 'dfp': 10, 'qdd': 8, 'ddo': 10, 'dof': 8, 'ofd': 10, 'fdp': 10, 'dpz': 7, 'poa': 6, 'oad': 8, 'ade': 6, 'dee': 6, 'ead': 8, 'adj': 5, 'djd': 10, 'jdq': 5, 'dqe': 6, 'qka': 5, 'akd': 6, 'kdq': 7, 'qdj': 4, 'jdn': 4, 'dnm': 6, 'nmd': 4, 'mda': 8, 'daz': 6, 'kqd': 6, 'qdp': 4, 'dpe': 7, 'ean': 4, 'njd': 5, 'jdf': 9, 'dfz': 6, 'fze': 6, 'fom': 5, 'mdp': 6, 'dpd': 8, 'pdm': 6, 'mkd': 4, 'kdk': 6, 'dkk': 8, 'kkj': 4, 'kjf': 4, 'jfj': 5, 'fjk': 6, 'jkd': 8, 'kdo': 5, 'dod': 6, 'odq': 6, 'dqo': 7, 'qod': 7, 'odf': 8, 'dfi': 8, 'fif': 6, 'iff': 7, 'aiq': 5, 'nzp': 5, 'zpd': 6, 'pdd': 9, 'ddf': 10, 'ffk': 6, 'fkj': 4, 'pid': 4, 'ddn': 10, 'dnz': 5, 'zaf': 6, 'afn': 5, 'fnm': 4, 'pqn': 5, 'qnp': 4, 'pmq': 4, 'pnp': 4, 'dko': 5, 'kof': 4, 'ofo': 4, 'fop': 5, 'opd': 4, 'fnq': 4, 'knm': 4, 'nmk': 5, 'pdz': 4, 'dze': 5, 'zed': 7, 'eda': 6, 'daa': 6, 'fde': 6, 'dem': 6, 'emq': 4, 'mqd': 4, 'djo': 5, 'opa': 5, 'paf': 4, 'afe': 4, 'eed': 6, 'edk': 4, 'dkd': 10, 'kda': 5, 'oif': 5, 'qda': 4, 'kmm': 4, 'ejf': 5, 'odz': 4, 'dzm': 6, 'zmf': 5, 'mfd': 7, 'fdd': 10, 'dfj': 8, 'fjj': 5, 'jjk': 6, 'kzo': 4, 'ojf': 4, 'jff': 4, 'ffd': 10, 'fdm': 8, 'dma': 5, 'maq': 4, 'qif': 5, 'ifo': 5, 'ofn': 6, 'fne': 5, 'ned': 6, 'ede': 8, 'ded': 10, 'edm': 8, 'dmn': 6, 'qin': 4, 'inp': 4, 'pnd': 7, 'ndi': 5, 'diz': 6, 'izz': 4, 'zjf': 6, 'jfz': 4, 'kjd': 6, 'jdm': 4, 'kde': 6, 'edj': 7, 'dfk': 7, 'fkd': 8, 'kdd': 10, 'dfd': 10, 'dfe': 7, 'fef': 7, 'eff': 6, 'dmp': 6, 'pef': 7, 'efj': 5, 'jnd': 5, 'ifd': 7, 'fda': 9, 'daq': 7, 'qqd': 5, 'jkj': 5, 'kja': 4, 'jad': 7, 'add': 10, 'ddq': 9, 'qdm': 7, 'dmd': 10, 'mdz': 5, 'fnz': 4, 'zpf': 4, 'pff': 7, 'ffq': 5, 'fqm': 4, 'ejd': 5, 'dmz': 6, 'anf': 6, 'nfd': 9, 'fdi': 8, 'noz': 4, 'jfq': 5, 'fqo': 5, 'iem': 5, 'emd': 6, 'dzk': 7, 'zkd': 6, 'daj': 6, 'ajn': 5, 'jnf': 4, 'nfa': 4, 'amd': 4, 'mdn': 7, 'dnq': 6, 'qdf': 8, 'dad': 7, 'adz': 8, 'edd': 8, 'doa': 9, 'adn': 4, 'dni': 6, 'efp': 4, 'ekd': 4, 'doq': 5, 'diq': 6, 'iqz': 4, 'qzz': 5, 'zdp': 7, 'pzz': 5, 'zdn': 5, 'dnn': 7, 'pzk': 4, 'keq': 5, 'end': 4, 'ndm': 5, 'iaa': 4, 'ajm': 5, 'jmd': 6, 'mdm': 5, 'mpd': 5, 'pde': 6, 'dea': 9, 'eak': 4, 'akz': 4, 'zjo': 5, 'jde': 5, 'enq': 5, 'nqf': 6, 'ifa': 4, 'qdq': 9, 'dqi': 7, 'qid': 4, 'idf': 7, 'fnp': 7, 'npq': 4, 'dpm': 7, 'mde': 8, 'eqo': 4, 'qoo': 4, 'oom': 5, 'odn': 4, 'enf': 5, 'nfi': 5, 'fik': 7, 'ikd': 4, 'kdp': 7, 'dpi': 6, 'ijd': 5, 'zid': 6, 'idq': 9, 'jji': 7, 'ifp': 6, 'fpz': 7, 'pzd': 5, 'zdd': 8, 'dde': 7, 'dep': 6, 'epm': 4, 'odj': 5, 'ddm': 9, 'doi': 5, 'oqe': 6, 'aoq': 4, 'dfo': 9, 'foz': 4, 'zof': 4, 'ofq': 4, 'fqe': 4, 'qef': 6, 'efk': 5, 'dnf': 8, 'nfe': 5, 'fed': 8, 'edq': 6, 'dqp': 7, 'pzf': 5, 'zfz': 6, 'zod': 5, 'oda': 6, 'dao': 8, 'aod': 9, 'dop': 10, 'dkf': 9, 'kfp': 6, 'fpi': 6, 'aio': 4, 'odd': 9, 'dnp': 6, 'edp': 4, 'piq': 5, 'pjd': 6, 'fkn': 8, 'ifq': 6, 'fqz': 6, 'qzm': 5, 'zmd': 8, 'dkm': 8, 'kmd': 7, 'nei': 4, 'pfk': 4, 'fke': 7, 'knn': 5, 'nnd': 5, 'ndd': 9, 'ddp': 9, 'mom': 4, 'zqm': 4, 'qmd': 5, 'mdf': 5, 'ffz': 7, 'edn': 4, 'odm': 5, 'fpo': 6, 'oqd': 5, 'qdk': 9, 'kdi': 6, 'din': 7, 'qfa': 4, 'faq': 4, 'aqm': 4, 'mnn': 6, 'nnq': 5, 'qdo': 6, 'odi': 7, 'die': 5, 'ezd': 6, 'zdo': 7, 'ofz': 6, 'fzd': 7, 'zdq': 5, 'azd': 5, 'dpa': 7, 'pme': 5, 'mep': 4, 'dqf': 7, 'jzq': 4, 'qzj': 4, 'zjd': 6, 'djj': 6, 'jef': 7, 'efd': 9, 'fdz': 9, 'dzn': 5, 'nia': 4, 'iad': 5, 'zmo': 5, 'moo': 4, 'oon': 4, 'pdj': 5, 'djp': 7, 'pod': 7, 'fdn': 9, 'jpf': 5, 'pfd': 7, 'dpk': 7, 'pke': 6, 'ekk': 4, 'kkf': 8, 'kff': 5, 'fff': 7, 'fon': 6, 'qpd': 4, 'djq': 6, 'qqo': 4, 'qon': 4, 'zfn': 5, 'nke': 4, 'ked': 6, 'eod': 4, 'ode': 7, 'dez': 7, 'ezi': 4, 'imf': 6, 'mpn': 7, 'oqk': 4, 'jfd': 8, 'ifm': 5, 'fdj': 10, 'ffm': 9, 'zzk': 5, 'zkf': 5, 'kfk': 4, 'fka': 4, 'kad': 6, 'adm': 6, 'dmf': 9, 'mff': 6, 'ffn': 5, 'ffe': 6, 'efn': 4, 'nip': 4, 'ipm': 4, 'afd': 10, 'dfm': 8, 'med': 6, 'edf': 7, 'dzd': 8, 'ddk': 9, 'dkj': 4, 'dik': 7, 'ikm': 4, 'kmn': 4, 'jid': 4, 'ido': 6, 'off': 6, 'ffa': 7, 'faa': 4, 'adp': 6, 'mze': 4, 'edz': 4, 'zda': 6, 'aop': 5, 'opo': 4, 'eqd': 6, 'ppe': 4, 'pej': 4, 'jaz': 4, 'epa': 4, 'emj': 4, 'npo': 4, 'poz': 4, 'aaf': 4, 'afk': 4, 'knd': 6, 'dna': 4, 'nqq': 4, 'dnj': 4, 'aff': 5, 'qei': 5, 'idm': 7, 'okp': 6, 'qff': 6, 'epn': 4, 'qjf': 5, 'did': 8, 'odp': 5, 'dpj': 6, 'qkm': 5, 'moj': 5, 'ojd': 5, 'edo': 7, 'dok': 6, 'oki': 4, 'fjd': 8, 'nmz': 4, 'nfn': 4, 'mak': 5, 'dzp': 5, 'kno': 4, 'ozd': 8, 'ddz': 9, 'dzf': 7, 'dej': 5, 'iqf': 4, 'iin': 4, 'ind': 6, 'ndp': 4, 'kzd': 5, 'zdf': 8, 'onf': 5, 'fqd': 8, 'dij': 5, 'mpf': 6, 'ffi': 5, 'ife': 9, 'doo': 7, 'ooe': 4, 'nif': 7, 'ifi': 5, 'ide': 6, 'kdf': 8, 'zfm': 4, 'fmd': 6, 'oai': 4, 'aif': 4, 'ifj': 6, 'fja': 4, 'jai': 5, 'qnd': 6, 'fod': 6, 'fae': 6, 'nkf': 6, 'zno': 5, 'qkd': 5, 'dqn': 5, 'nek': 4, 'kak': 5, 'kid': 6, 'idj': 8, 'dji': 6, 'pni': 4, 'nid': 6, 'idz': 5, 'fmn': 4, 'fpn': 5, 'nad': 8, 'mnd': 6, 'dpp': 6, 'ppd': 6, 'dkp': 4, 'pmj': 5, 'jkq': 4, 'qfk': 4, 'zqd': 7, 'zfj': 6, 'fjm': 4, 'ien': 4, 'nod': 5, 'fzp': 4, 'ina': 5, 'jdz': 5, 'zdi': 8, 'dia': 6, 'ndq': 5, 'fdk': 9, 'kmq': 5, 'jof': 4, 'pfq': 4, 'fqi': 4, 'njz': 4, 'zme': 6, 'mef': 5, 'efi': 4, 'fqf': 5, 'epd': 4, 'pda': 7, 'dmo': 6, 'mod': 6, 'zdm': 4, 'dmm': 4, 'mmo': 5, 'mon': 4, 'oni': 4, 'jdd': 8, 'ddi': 7, 'ffp': 6, 'fjf': 8, 'jfm': 6, 'odk': 4, 'dkz': 8, 'jmf': 4, 'aed': 7, 'dkq': 5, 'qfz': 5, 'mko': 4, 'okn': 5, 'dpo': 6, 'dnd': 8, 'ndo': 7, 'don': 5, 'aan': 5, 'fkq': 4, 'def': 6, 'fnn': 5, 'nnf': 6, 'pjk': 4, 'mfa': 6, 'fao': 5, 'nao': 5, 'aoi': 5, 'naj': 5, 'ied': 7, 'jzo': 4, 'fen': 5, 'iif': 5, 'jnn': 7, 'nne': 6, 'fqa': 5, 'apf': 5, 'aei': 5, 'idp': 7, 'zji': 4, 'jqf': 5, 'eai': 4, 'ijj': 4, 'iao': 6, 'deo': 4, 'ooz': 4, 'ozm': 4, 'oin': 4, 'ndz': 6, 'fje': 4, 'fdo': 6, 'iqd': 6, 'pfj': 4, 'dqm': 6, 'fan': 5, 'zfo': 5, 'ooj': 4, 'fof': 7, 'epj': 4, 'jdi': 7, 'pdo': 8, 'edi': 6, 'ika': 4, 'ajf': 5, 'jfp': 6, 'fpk': 8, 'pka': 4, 'ook': 4, 'pfa': 5, 'fad': 8, 'nqn': 5, 'aqd': 6, 'aee': 4, 'ema': 4, 'fej': 4, 'nfp': 6, 'njf': 5, 'fmo': 4, 'oef': 4, 'qdz': 5, 'zmp': 5, 'ejm': 4, 'jmj': 5, 'mjf': 4, 'jfe': 4, 'dfq': 7, 'pmp': 4, 'ozf': 4, 'zff': 6, 'fmk': 5, 'dae': 7, 'oed': 4, 'zjn': 4, 'kif': 4, 'zna': 5, 'fzf': 5, 'zfd': 7, 'nef': 5, 'efq': 4, 'fqk': 4, 'pmf': 4, 'mfe': 4, 'ond': 7, 'dpn': 6, 'pnf': 6, 'nfz': 4, 'pnk': 5, 'nkd': 6, 'fmm': 5, 'ipd': 5, 'ado': 6, 'oaf': 5, 'qzi': 4, 'adk': 6, 'dqq': 4, 'ada': 5, 'azn': 4, 'znf': 5, 'fii': 6, 'pfo': 5, 'kpf': 4, 'fkk': 4, 'kkd': 5, 'dqk': 4, 'ijk': 5, 'mnz': 5, 'kpd': 6, 'dai': 5, 'aip': 4, 'efa': 6, 'pdi': 5, 'zem': 4, 'emf': 4, 'dza': 5, 'dio': 6, 'iof': 4, 'omi': 4, 'mid': 6, 'daf': 8, 'afp': 4, 'fpe': 6, 'efz': 5, 'ekp': 4, 'pdp': 7, 'zif': 6, 'kjk': 4, 'koo': 4, 'kfe': 5, 'ddj': 6, 'dja': 5, 'mdd': 5, 'djn': 6, 'kem': 4, 'pdn': 5, 'ndn': 4, 'nmf': 4, 'okd': 4, 'kdn': 5, 'znm': 4, 'afi': 7, 'fie': 5, 'eij': 4, 'inn': 4, 'nna': 5, 'nan': 4, 'kqp': 5, 'ndk': 5, 'kmp': 5, 'pmz': 4, 'mzf': 4, 'dfa': 7, 'qmz': 4, 'mzd': 4, 'zfi': 4, 'iid': 4, 'dka': 5, 'qof': 4, 'kpe': 4, 'pkf': 6, 'kqa': 4, 'qaj': 5, 'mfj': 5, 'dki': 4, 'foj': 4, 'knj': 4, 'nzj': 6, 'jpn': 4, 'aie': 5, 'ief': 4, 'qak': 4, 'oof': 5, 'ofi': 4, 'fiq': 4, 'qdn': 6, 'pnz': 4, 'nze': 4, 'zfp': 4, 'nzf': 4, 'nff': 5, 'mfq': 4, 'fqn': 4, 'mdi': 6, 'amm': 6, 'izd': 7, 'kzf': 5, 'jda': 7, 'amo': 4, 'fmi': 6, 'ikf': 4, 'nmn': 5, 'nzd': 6, 'mad': 4, 'adi': 4, 'dip': 7, 'mkk': 5, 'qnn': 4, 'pik': 4, 'ipo': 4, 'nde': 4, 'emp': 4, 'mpk': 4, 'qmj': 5, 'nnk': 5, 'nkm': 4, 'qfm': 5, 'dom': 5, 'ini': 4, 'adf': 5, 'faf': 6, 'djm': 5, 'fai': 4, 'aid': 5, 'idi': 4, 'ajd': 5, 'jdo': 5, 'kpo': 4, 'pfi': 4, 'fmp': 4, 'zqa': 5, 'qoe': 4, 'ikk': 4, 'jqd': 4, 'kmf': 4, 'ofm': 4, 'eii': 4, 'iip': 4, 'iqe': 4, 'qea': 5, 'and': 4, 'mqq': 4, 'kfn': 4, 'mei': 5, 'oja': 4, 'jam': 4, 'mpm': 4, 'iej': 4, 'kkn': 4, 'zde': 4, 'eie': 4, 'fzn': 4, 'zdk': 4, 'eaf': 5, 'fjq': 4, 'mfn': 5, 'mzm': 5, 'qaf': 4, 'nda': 5, 'dpf': 6, 'aoa': 4, 'afq': 4, 'qoj': 4, 'eje': 4, 'ppz': 4, 'znd': 5, 'pjn': 4, 'opn': 4, 'fiz': 4, 'qmf': 4, 'qed': 4, 'kmi': 4, 'aad': 4, 'fkm': 4, 'mdq': 5, 'mjd': 4, 'dke': 4, 'dap': 4, 'pjf': 6, 'mop': 5, 'mpj': 5, 'ofa': 4, 'fem': 4, 'eof': 5, 'pkd': 4, 'pkq': 4, 'qpk': 4, 'dfnd': 5, 'fndf': 4, 'iddd': 5, 'ddda': 6, 'poad': 5, 'mdpd': 4, 'fjkd': 4, 'dfif': 4, 'zpdd': 4, 'ddff': 4, 'dddn': 5, 'dzed': 4, 'fddf': 6, 'djdf': 4, 'fkdd': 4, 'ddfd': 5, 'dfdf': 6, 'fdfe': 4, 'difd': 4, 'dzkd': 4, 'qdfd': 4, 'fdfd': 6, 'dfda': 4, 'doad': 4, 'ijdf': 4, 'dqdd': 6, 'dnfe': 4, 'daod': 5, 'dddm': 5, 'qdqd': 4, 'zddd': 5, 'dddd': 8, 'dddp': 5, 'fdfj': 4, 'jefd': 4, 'fdzn': 4, 'dfdn': 5, 'fdnf': 5, 'fdda': 4, 'dpke': 4, 'mafd': 4, 'dffd': 5, 'dzdo': 4, 'idmd': 4, 'dedd': 4, 'ifed': 4, 'dddk': 5, 'ddkd': 4, 'dkdf': 4, 'dfdd': 5, 'fddp': 5, 'ddpk': 5, 'ndfo': 4, 'efdd': 5, 'nddd': 6, 'dppd': 4, 'dadd': 4, 'ddmo': 4, 'ddqd': 4, 'dqdf': 4, 'ffmd': 4, 'mfan': 4, 'dfdk': 4, 'fdkd': 6, 'oddd': 4, 'didd': 4, 'ddaf': 6, 'ffdd': 4, 'dkmp': 4, 'fdid': 4, 'idnd': 4, 'didp': 5, 'addz': 5, 'kzfd': 4, 'dffm': 4, 'pdfd': 4, 'dddf': 4, 'ofdd': 6, 'fmdp': 4, 'ddkk': 4, 'dndd': 4, 'ddkf': 6, 'fjdf': 4, 'dfpk': 5, 'ndad': 4, 'fddd': 5, 'idfd': 4, 'dfdfd': 4, 'ndddd': 4}
>>> ...
{'kn': 10, 'np': 10, 'pj': 10, 'je': 10, 'ek': 10, 'kq': 10, 'qf': 10, 'pd': 10, 'nz': 10, 'zq': 10, 'qj': 10, 'ja': 9, 'ap': 10, 'fj': 10, 'jn': 10, 'nq': 10, 'di': 10, 'if': 10, 'nj': 10, 'jm': 10, 'mm': 10, 'ma': 10, 'fm': 10, 'mf': 10, 'fi': 10, 'id': 10, 'an': 10, 'qa': 10, 'jj': 10, 'pq': 10, 'ei': 10, 'ik': 10, 'kp': 10, 'pa': 10, 'en': 10, 'nn': 10, 'pi': 10, 'ii': 10, 'io': 10, 'od': 10, 'oz': 10, 'zp': 10, 'eq': 10, 'qz': 10, 'am': 10, 'mq': 9, 'fe': 10, 'ne': 10, 'ea': 10, 'no': 9, 'oi': 10, 'mi': 10, 'om': 10, 'jf': 10, 'nk': 10, 'ka': 10, 'aa': 10, 'ak': 10, 'kf': 10, 'pz': 10, 'po': 10, 'oa': 10, 'ee': 10, 'qe': 10, 'eo': 10, 'oo': 10, 'oq': 10, 'qq': 10, 'qk': 10, 'nm': 10, 'az': 10, 'zk': 10, 'pe': 10, 'fz': 10, 'ze': 10, 'ef': 10, 'fo': 10, 'mk': 10, 'kk': 10, 'kj': 9, 'jk': 10, 'qo': 10, 'qn': 10, 'na': 10, 'ai': 10, 'iq': 10, 'aq': 10, 'fk': 10, 'jp': 10, 'za': 10, 'mp': 10, 'qp': 9, 'pm': 10, 'pn': 10, 'ko': 9, 'dz': 10, 'em': 10, 'jo': 10, 'mz': 10, 'zo': 9, 'km': 10, 'mj': 10, 'ke': 10, 'ej': 10, 'zm': 10, 'kz': 10, 'oj': 10, 'qi': 9, 'mn': 10, 'in': 10, 'iz': 10, 'zz': 10, 'zj': 9, 'pf': 10, 'fq': 10, 'qm': 10, 'me': 10, 'nf': 10, 'ie': 9, 'aj': 10, 'fa': 10, 'ni': 10, 'ae': 10, 'zd': 10, 'zn': 10, 'ia': 9, 'ao': 10, 'ok': 10, 'oe': 10, 'ij': 9, 'zi': 10, 'ji': 9, 'ep': 9, 'ki': 10, 'zf': 10, 'im': 9, 'ip': 10, 'mo': 10, 'ez': 10, 'jz': 9, 'on': 10, 'pk': 10, 'jq': 10, 'pp': 9, 'dkn': 5, 'npj': 4, 'pje': 4, 'jek': 4, 'kqf': 5, 'qfp': 4, 'fpd': 7, 'pdk': 4, 'qja': 5, 'jap': 4, 'apd': 4, 'pdf': 9, 'dfn': 9, 'fnd': 10, 'ndf': 7, 'dff': 8, 'ffj': 6, 'fjn': 7, 'jnq': 5, 'nqd': 6, 'qdi': 8, 'dif': 9, 'afm': 5, 'fmf': 5, 'mfi': 7, 'fid': 9, 'idd': 8, 'dda': 10, 'dan': 6, 'anp': 4, 'npd': 6, 'pdq': 4, 'qap': 5, 'dqj': 5, 'jjd': 5, 'jdp': 4, 'dpq': 6, 'pqd': 6, 'qde': 8, 'dei': 6, 'ikp': 5, 'pad': 6, 'adq': 4, 'dqd': 10, 'den': 5, 'enn': 4, 'pii': 4, 'iod': 5, 'odo': 5, 'doz': 7, 'deq': 7, 'qfd': 6, 'fdq': 7, 'dqz': 8, 'zpa': 7, 'eid': 7, 'idn': 8, 'dne': 5, 'nea': 4, 'oid': 7, 'ddd': 10, 'dam': 5, 'omd': 6, 'mdj': 6, 'djf': 6, 'jfn': 5, 'fnk': 7, 'aak': 4, 'akf': 5, 'kfd': 7, 'fdf': 10, 'dfp': 10, 'qdd': 8, 'ddo': 10, 'dof': 8, 'ofd': 10, 'fdp': 10, 'dpz': 7, 'poa': 6, 'oad': 8, 'ade': 6, 'dee': 6, 'ead': 8, 'adj': 5, 'djd': 10, 'jdq': 5, 'dqe': 6, 'qka': 5, 'akd': 6, 'kdq': 7, 'qdj': 4, 'jdn': 4, 'dnm': 6, 'nmd': 4, 'mda': 8, 'daz': 6, 'kqd': 6, 'qdp': 4, 'dpe': 7, 'ean': 4, 'njd': 5, 'jdf': 9, 'dfz': 6, 'fze': 6, 'fom': 5, 'mdp': 6, 'dpd': 8, 'pdm': 6, 'mkd': 4, 'kdk': 6, 'dkk': 8, 'kkj': 4, 'kjf': 4, 'jfj': 5, 'fjk': 6, 'jkd': 8, 'kdo': 5, 'dod': 6, 'odq': 6, 'dqo': 7, 'qod': 7, 'odf': 8, 'dfi': 8, 'fif': 6, 'iff': 7, 'aiq': 5, 'nzp': 5, 'zpd': 6, 'pdd': 9, 'ddf': 10, 'ffk': 6, 'fkj': 4, 'pid': 4, 'ddn': 10, 'dnz': 5, 'zaf': 6, 'afn': 5, 'fnm': 4, 'pqn': 5, 'qnp': 4, 'pmq': 4, 'pnp': 4, 'dko': 5, 'kof': 4, 'ofo': 4, 'fop': 5, 'opd': 4, 'fnq': 4, 'knm': 4, 'nmk': 5, 'pdz': 4, 'dze': 5, 'zed': 7, 'eda': 6, 'daa': 6, 'fde': 6, 'dem': 6, 'emq': 4, 'mqd': 4, 'djo': 5, 'opa': 5, 'paf': 4, 'afe': 4, 'eed': 6, 'edk': 4, 'dkd': 10, 'kda': 5, 'oif': 5, 'qda': 4, 'kmm': 4, 'ejf': 5, 'odz': 4, 'dzm': 6, 'zmf': 5, 'mfd': 7, 'fdd': 10, 'dfj': 8, 'fjj': 5, 'jjk': 6, 'kzo': 4, 'ojf': 4, 'jff': 4, 'ffd': 10, 'fdm': 8, 'dma': 5, 'maq': 4, 'qif': 5, 'ifo': 5, 'ofn': 6, 'fne': 5, 'ned': 6, 'ede': 8, 'ded': 10, 'edm': 8, 'dmn': 6, 'qin': 4, 'inp': 4, 'pnd': 7, 'ndi': 5, 'diz': 6, 'izz': 4, 'zjf': 6, 'jfz': 4, 'kjd': 6, 'jdm': 4, 'kde': 6, 'edj': 7, 'dfk': 7, 'fkd': 8, 'kdd': 10, 'dfd': 10, 'dfe': 7, 'fef': 7, 'eff': 6, 'dmp': 6, 'pef': 7, 'efj': 5, 'jnd': 5, 'ifd': 7, 'fda': 9, 'daq': 7, 'qqd': 5, 'jkj': 5, 'kja': 4, 'jad': 7, 'add': 10, 'ddq': 9, 'qdm': 7, 'dmd': 10, 'mdz': 5, 'fnz': 4, 'zpf': 4, 'pff': 7, 'ffq': 5, 'fqm': 4, 'ejd': 5, 'dmz': 6, 'anf': 6, 'nfd': 9, 'fdi': 8, 'noz': 4, 'jfq': 5, 'fqo': 5, 'iem': 5, 'emd': 6, 'dzk': 7, 'zkd': 6, 'daj': 6, 'ajn': 5, 'jnf': 4, 'nfa': 4, 'amd': 4, 'mdn': 7, 'dnq': 6, 'qdf': 8, 'dad': 7, 'adz': 8, 'edd': 8, 'doa': 9, 'adn': 4, 'dni': 6, 'efp': 4, 'ekd': 4, 'doq': 5, 'diq': 6, 'iqz': 4, 'qzz': 5, 'zdp': 7, 'pzz': 5, 'zdn': 5, 'dnn': 7, 'pzk': 4, 'keq': 5, 'end': 4, 'ndm': 5, 'iaa': 4, 'ajm': 5, 'jmd': 6, 'mdm': 5, 'mpd': 5, 'pde': 6, 'dea': 9, 'eak': 4, 'akz': 4, 'zjo': 5, 'jde': 5, 'enq': 5, 'nqf': 6, 'ifa': 4, 'qdq': 9, 'dqi': 7, 'qid': 4, 'idf': 7, 'fnp': 7, 'npq': 4, 'dpm': 7, 'mde': 8, 'eqo': 4, 'qoo': 4, 'oom': 5, 'odn': 4, 'enf': 5, 'nfi': 5, 'fik': 7, 'ikd': 4, 'kdp': 7, 'dpi': 6, 'ijd': 5, 'zid': 6, 'idq': 9, 'jji': 7, 'ifp': 6, 'fpz': 7, 'pzd': 5, 'zdd': 8, 'dde': 7, 'dep': 6, 'epm': 4, 'odj': 5, 'ddm': 9, 'doi': 5, 'oqe': 6, 'aoq': 4, 'dfo': 9, 'foz': 4, 'zof': 4, 'ofq': 4, 'fqe': 4, 'qef': 6, 'efk': 5, 'dnf': 8, 'nfe': 5, 'fed': 8, 'edq': 6, 'dqp': 7, 'pzf': 5, 'zfz': 6, 'zod': 5, 'oda': 6, 'dao': 8, 'aod': 9, 'dop': 10, 'dkf': 9, 'kfp': 6, 'fpi': 6, 'aio': 4, 'odd': 9, 'dnp': 6, 'edp': 4, 'piq': 5, 'pjd': 6, 'fkn': 8, 'ifq': 6, 'fqz': 6, 'qzm': 5, 'zmd': 8, 'dkm': 8, 'kmd': 7, 'nei': 4, 'pfk': 4, 'fke': 7, 'knn': 5, 'nnd': 5, 'ndd': 9, 'ddp': 9, 'mom': 4, 'zqm': 4, 'qmd': 5, 'mdf': 5, 'ffz': 7, 'edn': 4, 'odm': 5, 'fpo': 6, 'oqd': 5, 'qdk': 9, 'kdi': 6, 'din': 7, 'qfa': 4, 'faq': 4, 'aqm': 4, 'mnn': 6, 'nnq': 5, 'qdo': 6, 'odi': 7, 'die': 5, 'ezd': 6, 'zdo': 7, 'ofz': 6, 'fzd': 7, 'zdq': 5, 'azd': 5, 'dpa': 7, 'pme': 5, 'mep': 4, 'dqf': 7, 'jzq': 4, 'qzj': 4, 'zjd': 6, 'djj': 6, 'jef': 7, 'efd': 9, 'fdz': 9, 'dzn': 5, 'nia': 4, 'iad': 5, 'zmo': 5, 'moo': 4, 'oon': 4, 'pdj': 5, 'djp': 7, 'pod': 7, 'fdn': 9, 'jpf': 5, 'pfd': 7, 'dpk': 7, 'pke': 6, 'ekk': 4, 'kkf': 8, 'kff': 5, 'fff': 7, 'fon': 6, 'qpd': 4, 'djq': 6, 'qqo': 4, 'qon': 4, 'zfn': 5, 'nke': 4, 'ked': 6, 'eod': 4, 'ode': 7, 'dez': 7, 'ezi': 4, 'imf': 6, 'mpn': 7, 'oqk': 4, 'jfd': 8, 'ifm': 5, 'fdj': 10, 'ffm': 9, 'zzk': 5, 'zkf': 5, 'kfk': 4, 'fka': 4, 'kad': 6, 'adm': 6, 'dmf': 9, 'mff': 6, 'ffn': 5, 'ffe': 6, 'efn': 4, 'nip': 4, 'ipm': 4, 'afd': 10, 'dfm': 8, 'med': 6, 'edf': 7, 'dzd': 8, 'ddk': 9, 'dkj': 4, 'dik': 7, 'ikm': 4, 'kmn': 4, 'jid': 4, 'ido': 6, 'off': 6, 'ffa': 7, 'faa': 4, 'adp': 6, 'mze': 4, 'edz': 4, 'zda': 6, 'aop': 5, 'opo': 4, 'eqd': 6, 'ppe': 4, 'pej': 4, 'jaz': 4, 'epa': 4, 'emj': 4, 'npo': 4, 'poz': 4, 'aaf': 4, 'afk': 4, 'knd': 6, 'dna': 4, 'nqq': 4, 'dnj': 4, 'aff': 5, 'qei': 5, 'idm': 7, 'okp': 6, 'qff': 6, 'epn': 4, 'qjf': 5, 'did': 8, 'odp': 5, 'dpj': 6, 'qkm': 5, 'moj': 5, 'ojd': 5, 'edo': 7, 'dok': 6, 'oki': 4, 'fjd': 8, 'nmz': 4, 'nfn': 4, 'mak': 5, 'dzp': 5, 'kno': 4, 'ozd': 8, 'ddz': 9, 'dzf': 7, 'dej': 5, 'iqf': 4, 'iin': 4, 'ind': 6, 'ndp': 4, 'kzd': 5, 'zdf': 8, 'onf': 5, 'fqd': 8, 'dij': 5, 'mpf': 6, 'ffi': 5, 'ife': 9, 'doo': 7, 'ooe': 4, 'nif': 7, 'ifi': 5, 'ide': 6, 'kdf': 8, 'zfm': 4, 'fmd': 6, 'oai': 4, 'aif': 4, 'ifj': 6, 'fja': 4, 'jai': 5, 'qnd': 6, 'fod': 6, 'fae': 6, 'nkf': 6, 'zno': 5, 'qkd': 5, 'dqn': 5, 'nek': 4, 'kak': 5, 'kid': 6, 'idj': 8, 'dji': 6, 'pni': 4, 'nid': 6, 'idz': 5, 'fmn': 4, 'fpn': 5, 'nad': 8, 'mnd': 6, 'dpp': 6, 'ppd': 6, 'dkp': 4, 'pmj': 5, 'jkq': 4, 'qfk': 4, 'zqd': 7, 'zfj': 6, 'fjm': 4, 'ien': 4, 'nod': 5, 'fzp': 4, 'ina': 5, 'jdz': 5, 'zdi': 8, 'dia': 6, 'ndq': 5, 'fdk': 9, 'kmq': 5, 'jof': 4, 'pfq': 4, 'fqi': 4, 'njz': 4, 'zme': 6, 'mef': 5, 'efi': 4, 'fqf': 5, 'epd': 4, 'pda': 7, 'dmo': 6, 'mod': 6, 'zdm': 4, 'dmm': 4, 'mmo': 5, 'mon': 4, 'oni': 4, 'jdd': 8, 'ddi': 7, 'ffp': 6, 'fjf': 8, 'jfm': 6, 'odk': 4, 'dkz': 8, 'jmf': 4, 'aed': 7, 'dkq': 5, 'qfz': 5, 'mko': 4, 'okn': 5, 'dpo': 6, 'dnd': 8, 'ndo': 7, 'don': 5, 'aan': 5, 'fkq': 4, 'def': 6, 'fnn': 5, 'nnf': 6, 'pjk': 4, 'mfa': 6, 'fao': 5, 'nao': 5, 'aoi': 5, 'naj': 5, 'ied': 7, 'jzo': 4, 'fen': 5, 'iif': 5, 'jnn': 7, 'nne': 6, 'fqa': 5, 'apf': 5, 'aei': 5, 'idp': 7, 'zji': 4, 'jqf': 5, 'eai': 4, 'ijj': 4, 'iao': 6, 'deo': 4, 'ooz': 4, 'ozm': 4, 'oin': 4, 'ndz': 6, 'fje': 4, 'fdo': 6, 'iqd': 6, 'pfj': 4, 'dqm': 6, 'fan': 5, 'zfo': 5, 'ooj': 4, 'fof': 7, 'epj': 4, 'jdi': 7, 'pdo': 8, 'edi': 6, 'ika': 4, 'ajf': 5, 'jfp': 6, 'fpk': 8, 'pka': 4, 'ook': 4, 'pfa': 5, 'fad': 8, 'nqn': 5, 'aqd': 6, 'aee': 4, 'ema': 4, 'fej': 4, 'nfp': 6, 'njf': 5, 'fmo': 4, 'oef': 4, 'qdz': 5, 'zmp': 5, 'ejm': 4, 'jmj': 5, 'mjf': 4, 'jfe': 4, 'dfq': 7, 'pmp': 4, 'ozf': 4, 'zff': 6, 'fmk': 5, 'dae': 7, 'oed': 4, 'zjn': 4, 'kif': 4, 'zna': 5, 'fzf': 5, 'zfd': 7, 'nef': 5, 'efq': 4, 'fqk': 4, 'pmf': 4, 'mfe': 4, 'ond': 7, 'dpn': 6, 'pnf': 6, 'nfz': 4, 'pnk': 5, 'nkd': 6, 'fmm': 5, 'ipd': 5, 'ado': 6, 'oaf': 5, 'qzi': 4, 'adk': 6, 'dqq': 4, 'ada': 5, 'azn': 4, 'znf': 5, 'fii': 6, 'pfo': 5, 'kpf': 4, 'fkk': 4, 'kkd': 5, 'dqk': 4, 'ijk': 5, 'mnz': 5, 'kpd': 6, 'dai': 5, 'aip': 4, 'efa': 6, 'pdi': 5, 'zem': 4, 'emf': 4, 'dza': 5, 'dio': 6, 'iof': 4, 'omi': 4, 'mid': 6, 'daf': 8, 'afp': 4, 'fpe': 6, 'efz': 5, 'ekp': 4, 'pdp': 7, 'zif': 6, 'kjk': 4, 'koo': 4, 'kfe': 5, 'ddj': 6, 'dja': 5, 'mdd': 5, 'djn': 6, 'kem': 4, 'pdn': 5, 'ndn': 4, 'nmf': 4, 'okd': 4, 'kdn': 5, 'znm': 4, 'afi': 7, 'fie': 5, 'eij': 4, 'inn': 4, 'nna': 5, 'nan': 4, 'kqp': 5, 'ndk': 5, 'kmp': 5, 'pmz': 4, 'mzf': 4, 'dfa': 7, 'qmz': 4, 'mzd': 4, 'zfi': 4, 'iid': 4, 'dka': 5, 'qof': 4, 'kpe': 4, 'pkf': 6, 'kqa': 4, 'qaj': 5, 'mfj': 5, 'dki': 4, 'foj': 4, 'knj': 4, 'nzj': 6, 'jpn': 4, 'aie': 5, 'ief': 4, 'qak': 4, 'oof': 5, 'ofi': 4, 'fiq': 4, 'qdn': 6, 'pnz': 4, 'nze': 4, 'zfp': 4, 'nzf': 4, 'nff': 5, 'mfq': 4, 'fqn': 4, 'mdi': 6, 'amm': 6, 'izd': 7, 'kzf': 5, 'jda': 7, 'amo': 4, 'fmi': 6, 'ikf': 4, 'nmn': 5, 'nzd': 6, 'mad': 4, 'adi': 4, 'dip': 7, 'mkk': 5, 'qnn': 4, 'pik': 4, 'ipo': 4, 'nde': 4, 'emp': 4, 'mpk': 4, 'qmj': 5, 'nnk': 5, 'nkm': 4, 'qfm': 5, 'dom': 5, 'ini': 4, 'adf': 5, 'faf': 6, 'djm': 5, 'fai': 4, 'aid': 5, 'idi': 4, 'ajd': 5, 'jdo': 5, 'kpo': 4, 'pfi': 4, 'fmp': 4, 'zqa': 5, 'qoe': 4, 'ikk': 4, 'jqd': 4, 'kmf': 4, 'ofm': 4, 'eii': 4, 'iip': 4, 'iqe': 4, 'qea': 5, 'and': 4, 'mqq': 4, 'kfn': 4, 'mei': 5, 'oja': 4, 'jam': 4, 'mpm': 4, 'iej': 4, 'kkn': 4, 'zde': 4, 'eie': 4, 'fzn': 4, 'zdk': 4, 'eaf': 5, 'fjq': 4, 'mfn': 5, 'mzm': 5, 'qaf': 4, 'nda': 5, 'dpf': 6, 'aoa': 4, 'afq': 4, 'qoj': 4, 'eje': 4, 'ppz': 4, 'znd': 5, 'pjn': 4, 'opn': 4, 'fiz': 4, 'qmf': 4, 'qed': 4, 'kmi': 4, 'aad': 4, 'fkm': 4, 'mdq': 5, 'mjd': 4, 'dke': 4, 'dap': 4, 'pjf': 6, 'mop': 5, 'mpj': 5, 'ofa': 4, 'fem': 4, 'eof': 5, 'pkd': 4, 'pkq': 4, 'qpk': 4, 'dfnd': 5, 'fndf': 4, 'iddd': 5, 'ddda': 6, 'poad': 5, 'mdpd': 4, 'fjkd': 4, 'dfif': 4, 'zpdd': 4, 'ddff': 4, 'dddn': 5, 'dzed': 4, 'fddf': 6, 'djdf': 4, 'fkdd': 4, 'ddfd': 5, 'dfdf': 6, 'fdfe': 4, 'difd': 4, 'dzkd': 4, 'qdfd': 4, 'fdfd': 6, 'dfda': 4, 'doad': 4, 'ijdf': 4, 'dqdd': 6, 'dnfe': 4, 'daod': 5, 'dddm': 5, 'qdqd': 4, 'zddd': 5, 'dddd': 8, 'dddp': 5, 'fdfj': 4, 'jefd': 4, 'fdzn': 4, 'dfdn': 5, 'fdnf': 5, 'fdda': 4, 'dpke': 4, 'mafd': 4, 'dffd': 5, 'dzdo': 4, 'idmd': 4, 'dedd': 4, 'ifed': 4, 'dddk': 5, 'ddkd': 4, 'dkdf': 4, 'dfdd': 5, 'fddp': 5, 'ddpk': 5, 'ndfo': 4, 'efdd': 5, 'nddd': 6, 'dppd': 4, 'dadd': 4, 'ddmo': 4, 'ddqd': 4, 'dqdf': 4, 'ffmd': 4, 'mfan': 4, 'dfdk': 4, 'fdkd': 6, 'oddd': 4, 'didd': 4, 'ddaf': 6, 'ffdd': 4, 'dkmp': 4, 'fdid': 4, 'idnd': 4, 'didp': 5, 'addz': 5, 'kzfd': 4, 'dffm': 4, 'pdfd': 4, 'dddf': 4, 'ofdd': 6, 'fmdp': 4, 'ddkk': 4, 'dndd': 4, 'ddkf': 6, 'fjdf': 4, 'dfpk': 5, 'ndad': 4, 'fddd': 5, 'idfd': 4, 'dfdfd': 4, 'ndddd': 4}
I would like an implementation in Python 3 that is faster and uses less memory.
2 Answers 2
O(n * d^2) solution
The original code runs in O(n^2) time. This code runs in O(n d^2) where n is the length of mydict
and d is the length of a key. The final code is at the bottom of the post. Here is how I got there:
First, I had the same idea as @HeapOverflow, but then decided to try another approach. For any key, value in mydict
, we can create all the key's substrings and build a dictionary mapping (substring, value) to a list of keys that have that substring and value.
from collections import defaultdict
d = defaultdict(list)
for key, value in mydict.items():
for width in range(1, len(key) + 1):
for start in range(len(key) + 1 - width):
subkey = key[start:start + width]
pair = (subkey, value)
d[pair].append(key)
The outer loop runs O(n) times and the inner two loops run O(d^2) times for O(nd^2) complexity. The desired result is then those substring, value pairs that can only be generated from one key, value pair.
newdict = dict(k for k,v in d.items() if len(v)==1 and k[0]==v[0])
For the sample data in the problem, this runs in about 8 ms compared to 85 ms for the original code. However, it uses a lot of memory to store the lists of keys in the dict d
. We can reduce the memory by noting that newdict
only depends on whether the list of keys has one, or more than one element. The new code only appends a key to the list if the list has one element.
d = defaultdict(list)
for key, value in mydict.items():
for width in range(1, len(key) + 1):
for start in range(len(key) + 1 - width):
subkey = key[start:start + width]
pair = (subkey, value)
if len(d[pair]) < 2: #### changed code
d[pair].append(key)
newdict = dict(k for k,v in d.items() if len(v)==1 and k[0]==v[0])
This uses reduces memory about 60% and runs in about 7.5 ms.
The next step is to recognize that d
has two kinds of entries, those whose value has a length of 1 and those that have a length > 1. Basically, two sets. So let's try using sets: unique
holds the unique key-value pairs; subsumed
holds subkey-value pairs that would be subsumed by a unique key-value pair.
subsumed = set()
unique = set()
for key, value in mydict.items():
if key in subsumed:
continue
unique.add((key, value))
for width in range(1, len(key)):
for start in range(len(key) + 1 - width):
subkey = key[start:start + width]
pair = (subkey, value)
subsumed.add(pair)
unique.discard(pair)
newdict = dict(unique)
This runs in about 6 ms.
Final code
Lastly, dict.items()
returns a view. If the values are hashable, then dict.items()
can be used like a set and the code can be simplified to:
subsumed = set()
for key, value in mydict.items():
for width in range(1, len(key)):
for start in range(len(key) + 1 - width):
subkey = key[start:start + width]
pair = (subkey, value)
subsumed.add(pair)
newdict = dict(mydict.items() - subsumed)
This runs in about 5 ms.
Bug in original code
There is a probable bug in both the original code, in that the results depend on the order of iteration of the key-value pairs in mydict.items()
. For Python >= 3.7 the order is the same as the insertion order. For earlier versions, the order is undefined or random.
For example, for both codes:
mydict = {'a':1, 'aa':1} ==> results in newdict = {'aa':1}.
But
mydict = {'aa':1, 'a':1} ==> results in newdict ={'aa':1, 'a':1}.
-
\$\begingroup\$ Yeah, that's what I commented under the question already. And it's one reason I didn't try further improvements, intentionally keeping the logic as is because the textual task description isn't quite clear to me and there's no guarantee (or anything said) about the input order. But I think our codes are correct as long as the keys are sorted by increasing length, which they are in the example data (which should be representative of the real data). \$\endgroup\$Kelly Bundy– Kelly Bundy2020年09月24日 08:13:46 +00:00Commented Sep 24, 2020 at 8:13
-
\$\begingroup\$ @HeapOverflow For Python >= 3.7, the codes are okay as long as the keys are inserted in order of increasing length. For Python < 3.7 the order of iteration is arbitrary so there may be errors. \$\endgroup\$RootTwo– RootTwo2020年09月24日 09:12:06 +00:00Commented Sep 24, 2020 at 9:12
-
\$\begingroup\$ Ah right, we've had 3.7 for so long now that I usually don't even think about that anymore :-P \$\endgroup\$Kelly Bundy– Kelly Bundy2020年09月24日 09:21:25 +00:00Commented Sep 24, 2020 at 9:21
-
\$\begingroup\$ If we consider the string length in the complexity, then I think we all get another factor d. Copying a substring and computing a string hash aren't O(1). \$\endgroup\$Kelly Bundy– Kelly Bundy2020年09月24日 10:06:27 +00:00Commented Sep 24, 2020 at 10:06
-
\$\begingroup\$ @HeapOverflow, sorry I didn't see you comment about the bug. As for complexity, I was just pointing out that my inner loops are a function of key length, not the number of keys. So the code might not be good for processing a short dictionary with long keys. Another way to look at it is O(n) with the scale factor being a function of d^2. \$\endgroup\$RootTwo– RootTwo2020年09月24日 18:31:37 +00:00Commented Sep 24, 2020 at 18:31
You try all pairs and besides comparing the keys, you check whether their values are equal. It's faster to only try pairs whose values are equal, and you can do this by first categorizing by value.
def filtered(mydict):
items_for_value = {}
for item in mydict.items():
items_for_value.setdefault(item[1], []).append(item)
newdict = {}
for key1, value1 in mydict.items():
for key2, value2 in items_for_value[value1]: # <= changed line
if key1 in key2:
if key1 in newdict:
del newdict[key1]
newdict[key2] = value2
return newdict
It's about five times as fast as your original on that larger dictionary (0.7 seconds vs 3.5 seconds).
Of course this uses more memory, not less.
Alternative version, categorizing just the keys, which is a bit faster and takes less extra space (but assumes it doesn't matter which of two equal value
s we use):
def filtered(mydict):
keys_for_value = {}
for key, value in mydict.items():
keys_for_value.setdefault(value, []).append(key)
newdict = {}
for key1, value in mydict.items():
for key2 in keys_for_value[value]:
if key1 in key2:
if key1 in newdict:
del newdict[key1]
newdict[key2] = value
return newdict
-
\$\begingroup\$ Sorry, forgot that the item tuples might get created just for me. Deleted the sentence. \$\endgroup\$Kelly Bundy– Kelly Bundy2020年09月23日 16:06:10 +00:00Commented Sep 23, 2020 at 16:06
-
\$\begingroup\$ @Peilonrayz It would be not that much more if I stored only keys instead of items, right? Far less than double then. I had actually started with that, but then worried about "equal but not identical" objects (although if they're just small ints, that should be equivalent). \$\endgroup\$Kelly Bundy– Kelly Bundy2020年09月23日 16:15:50 +00:00Commented Sep 23, 2020 at 16:15
-
\$\begingroup\$ The updated form would still double in space worst case - say you have
{'a': 0, 'b': 1}
then you'd make{0: ['a'], 1: ['b']}
As you can see this contains the same amount of keys as values. We can also see you'll always double the keys,{'a': 0, 'b': 0}
to{0: ['a', 'b']}
. So worst case it's still double, but yes you've reduced the amount of memory. \$\endgroup\$2020年09月23日 16:49:11 +00:00Commented Sep 23, 2020 at 16:49 -
\$\begingroup\$ @Peilonrayz Not sure what you mean with worst case, but I just tried,
mydict
took 52,982 bytes total and mykeys_for_value
added 5,472 bytes total, that's only 10.3% more. \$\endgroup\$Kelly Bundy– Kelly Bundy2020年09月23日 18:21:23 +00:00Commented Sep 23, 2020 at 18:21 -
\$\begingroup\$ It's unclear what you mean by "added" like your function took that much more in total or
keys_for_value
was that big? Additionally the OP's code isn't the worst case. \$\endgroup\$2020年09月23日 18:25:09 +00:00Commented Sep 23, 2020 at 18:25
Explore related questions
See similar questions with these tags.
{'a': 0, 'ab': 0}
into{'ab': 0}
but turn{'ab': 0, 'a': 0}
into{'ab': 0, 'a': 0}
. Is that really correct? \$\endgroup\${'a': 0, 'ab': 0} and {'ab': 0, 'a': 0}
will be into{'ab': 0}
, as a is in ab. Does this make a difference? \$\endgroup\$