|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 - |
| 3 | +import os, csv, subprocess, json |
| 4 | + |
| 5 | +platforms = {} |
| 6 | +exts = { |
| 7 | + '': 'NA', |
| 8 | + 'asp': 'ASP', |
| 9 | + 'htm': 'HTML', |
| 10 | + 'html': 'HTML', |
| 11 | + 'c': 'C', |
| 12 | + 'cpp': 'C++', |
| 13 | + 'delphi': 'Delphi', |
| 14 | + 'jar': 'Java', |
| 15 | + 'java': 'Java', |
| 16 | + 'jsp': 'Java', |
| 17 | + 'js': 'JavaScript', |
| 18 | + 'php': 'PHP', |
| 19 | + 'pl': 'Perl', |
| 20 | + 'pm': 'Perl', |
| 21 | + 'py': 'Python', |
| 22 | + 'rb': 'Ruby', |
| 23 | + 'sh': 'Shell', |
| 24 | + 'sql': 'SQL', |
| 25 | + 'txt': 'Text', |
| 26 | + 'zip': 'NA' |
| 27 | +} |
| 28 | + |
| 29 | +ferr = open('cloc-messages.txt', 'w') |
| 30 | + |
| 31 | +with open('files.csv', 'rb') as f: |
| 32 | + reader = csv.reader(f) |
| 33 | + headers = reader.next() |
| 34 | + |
| 35 | + for [id, file, description, date, author, platform, type, port] in reader: |
| 36 | + if platform not in platforms: |
| 37 | + platforms[platform] = { 'name': platform, 'value': 0, 'types': {}, 'code': {} } |
| 38 | + platforms[platform]['value'] += 1 |
| 39 | + platforms[platform]['types'][type] = platforms[platform]['types'].get(type, 0) + 1 |
| 40 | + |
| 41 | + # don't cloc on known file types |
| 42 | + fext = os.path.splitext(file)[-1].lstrip('.') |
| 43 | + if fext in exts: |
| 44 | + fext = exts[fext] |
| 45 | + platforms[platform]['code'][fext] = platforms[platform]['code'].get(fext, 0) + 1 |
| 46 | + continue |
| 47 | + |
| 48 | + continue |
| 49 | + |
| 50 | + # call cloc to compute a summary of the script |
| 51 | + print('Call cloc with %s' % file) |
| 52 | + try: |
| 53 | + csv = subprocess.check_output(['cloc', '--csv', '--quiet', file], stderr=ferr) |
| 54 | + except subprocess.CalledProcessError: |
| 55 | + print('Call to cloc produced an error with %s' % file) |
| 56 | + continue |
| 57 | + |
| 58 | + if '' == csv: |
| 59 | + print('Clock did not yield a result for %s' % file) |
| 60 | + continue |
| 61 | + |
| 62 | + # save last line of CSV output containing data in vars |
| 63 | + [files, language, blank, comment, code] = csv.strip().split('\n')[-1].split(',') |
| 64 | + platforms[platform]['code'][language] = platforms[platform]['code'].get(language, 0) + 1 |
| 65 | + |
| 66 | +ferr.close() |
| 67 | + |
| 68 | +# make data d3 friendly |
| 69 | +def sortdict(d): |
| 70 | + return sorted([{'name': k, 'value': v} for k, v in d.items()], |
| 71 | + key=lambda x: x.values()[-1], reverse=True) |
| 72 | + |
| 73 | +pfs = sorted([v for k, v in platforms.items()], key=lambda x: x['value']) |
| 74 | +for idx, p in enumerate(pfs): |
| 75 | + p['types'] = sortdict(p['types']) |
| 76 | + p['code'] = sortdict(p['code']) |
| 77 | + pfs[idx] = p |
| 78 | + |
| 79 | +with open('exploit-db-platforms.json', 'w') as f: |
| 80 | + json.dump(pfs, f) |
| 81 | + |
0 commit comments