User:Commander Keane/wikidump2mapdata.py
Appearance
From Wikipedia, the free encyclopedia
""" wikidump2mapdata.py A Python script to create a Wikimedia Commons Data: file for mapframe display from a wikitext list of UK grid references (with bold headings in the list). Markers have blank titles and descriptions with the heading and grid reference. Input: .txt file Output: commons_data.json """ importre importOSGridConverter output_filename = "commons_data.json" wiki_dump_filename = "grid-refs-from-wiki.txt" decimal_places = 6 # Number of decimal places for lat/lon grid_ref_regex = r'^.*([A-Z]{2} [0-9]{4}).*$' # Captures format "SK 1234" subheading_regex = r"^.*'''(.*)'''.*$" # Captures within triple quotes subheading = 'default_subheading' grid_refs_from_wiki = [] data = {} defuk_grid_to_coords(grid_ref): """ Convert UK grid reference (e.g., "SK 0805") to decimal coordinates (lat, lon). Args: grid_ref: String in format "XX 0805" where XX is two letters and 0805 is 4 digits Returns: tuple: (latitude, longitude) in decimal degrees (WGS84) """ # OSGridConverter expects format like "SK08050500" (10 figures) # Convert 4-figure "SK 0805" to 10-figure format grid_ref = grid_ref.replace(" ", "").upper() letters = grid_ref[:2] easting = grid_ref[2:4] northing = grid_ref[4:6] # Pad to 3 figures each full_grid_ref = f"{letters}{easting}0{northing}0" # Convert using OSGridConverter coords = OSGridConverter.grid2latlong(full_grid_ref) lat = round(coords.latitude, decimal_places) lon = round(coords.longitude, decimal_places) return lat, lon with open(wiki_dump_filename, 'r') as f: for line in f: match_subheading = re.match(subheading_regex, line.strip()) if match_subheading: subheading = match_subheading.group(1) data[subheading] = [] match_grid_ref = re.match(grid_ref_regex, line.strip()) if match_grid_ref: grid_ref = match_grid_ref.group(1) lat, lon = uk_grid_to_coords(grid_ref) data[subheading].append([grid_ref, lat, lon]) # Create GeoJSON output for Wikimedia Commons commons_output = [] output_start = '''{ "type": "FeatureCollection", "features": [''' output_end = ''' ] }''' commons_output.append(output_start) for heading in data: for item in data[heading]: grid_ref = item[0] lat = item[1] lon = item[2] color = "#555555" item_output = f''' {{ "type": "Feature", "properties": {{ "marker-color": "{color}", "title":"", "description":"{heading}, grid reference: {grid_ref}","marker-size": "small"}}, "geometry": {{ "type": "Point", "coordinates": [{lon}, {lat}] }} }},''' commons_output.append(item_output) commons_output[-1] = commons_output[-1].rstrip(',') # Remove trailing comma from last item commons_output.append(output_end) with open(output_filename, 'w') as f: for line in commons_output: f.write(f"{line}\n")