Python script to convert scripts to png files

Question 1

I have written a Python script that converts scripts to png files, entirely by myself without anybody's help.

A picture is worth more than a thousand words so I will let the picture do the descriptions.

Code

import numpy as np
from io import BytesIO
from pathlib import Path
from PIL import Image
from pygments import highlight
from pygments.formatters import ImageFormatter
from pygments.lexers import get_lexer_by_name
from pygments.style import Style
from pygments.token import Name, Comment, String, Punctuation, Number, Operator, Literal, Keyword
class MyStyle(Style):
 styles = {
 Keyword: 'bold #07487f bg:#00122a',
 Name: '#7b68ee bg:#00122a',
 Name.Builtin: 'italic #fb4570',
 Name.Class: 'bold italic #ffd700',
 Name.Constant: 'bold',
 Name.Decorator: 'bold italic',
 Name.Exception: 'bold italic #ff005d',
 Name.Function: 'bold italic #134dbc',
 Name.Namespace: 'bold italic #ffa000',
 Name.Variable.Class: 'bold',
 Name.Variable.Global: 'bold',
 Literal: '#00ff80 bg:#00122a',
 String: '#20ff40 bg:#00122a',
 String.Escape: 'italic #d06000',
 Number: '#00c0ff bg:#00122a',
 Operator: 'bold #408020 bg:#00122a',
 Operator.Word: 'bold #0000f0',
 Punctuation: '#ff00c0 bg:#00122a',
 Comment: 'italic #800080 bg:#00122a'
 }
def script2png(filepath, language='python'):
 code = '\n'.join([i.rstrip() for i in Path(
 filepath).read_text(encoding='utf8').splitlines()])
 formatter = ImageFormatter(style=MyStyle, full=True,
 font_name='Source Code Pro', font_size=16,
 line_number_bg='#003b6f', line_number_fg='#ff69b4',
 line_number_bold=True, line_pad=3)
 image = highlight(code, get_lexer_by_name(language), formatter)
 image = np.array(Image.open(BytesIO(image)))
 image[np.all(image == (255, 255, 255), axis=-1)] = (0, 18, 42)
 image = Image.fromarray(image)
 image.save(filepath+'.png')

How is the code? This is my first time writing something like this, and I really want to know how to improve it.

Question 2

You're importing a lot of symbols from pygments.token. It is probably cleaner to alias that import and clean up your namespace a bit.

I don't see why you're jumping through so many hoops when splitting and joining your text. Why not just read the entire file as a string and leave it be?

pygments is poorly designed. It has a lot of internal inconsistencies, such as some Style members being respected for some formatter subclasses and not others. You should be moving as much styling information to your style class as possible. pygments has a deeply confused interpretation of line_number_bg: it can (and often is) expressed as Style.line_number_background_color, but the latter is only respected in CSS mode. Still: better to define it on your class, and manually pass it in for the ImageFormatter constructor that is broken and does not respect the style.

Do not use Numpy or Pillow and do not replace your background by colour. Instead, define Style.background_color.

Both your BytesIO and your Image should be put in a with for guaranteed closure.

Unless you need to do something strange and non-standard, it is more convenient to remove your language argument and replace get_lexer_by_name with get_lexer_for_filename.

If you absolutely require a .png you should be passing that format explicitly to the formatter. Otherwise, a reasonable implementation would be a script2img that allows the ImageFormatter to use its default format, whatever that may be; does not hard-code the .png suffix; and instead pulls that from formatter.image_format.

Suggested

from pathlib import Path
from pygments import highlight
from pygments.formatters import ImageFormatter
from pygments.lexers import get_lexer_for_filename
from pygments.style import Style
import pygments.token as tokens
class MyStyle(Style):
 background_color = '#00122a'
 # Only applies to HtmlFormatter
 line_number_background_color = '#003b6f'
 line_number_color = '#ff69b4'
 styles = {
 tokens.Keyword: f'bold #07487f bg:{background_color}',
 tokens.Name: f'#7b68ee bg:{background_color}',
 tokens.Name.Builtin: 'italic #fb4570',
 tokens.Name.Class: 'bold italic #ffd700',
 tokens.Name.Constant: 'bold',
 tokens.Name.Decorator: 'bold italic',
 tokens.Name.Exception: 'bold italic #ff005d',
 tokens.Name.Function: 'bold italic #134dbc',
 tokens.Name.Namespace: 'bold italic #ffa000',
 tokens.Name.Variable.Class: 'bold',
 tokens.Name.Variable.Global: 'bold',
 tokens.Literal: f'#00ff80 bg:{background_color}',
 tokens.String: f'#20ff40 bg:{background_color}',
 tokens.String.Escape: 'italic #d06000',
 tokens.Number: f'#00c0ff bg:{background_color}',
 tokens.Operator: f'bold #408020 bg:{background_color}',
 tokens.Operator.Word: 'bold #0000f0',
 tokens.Punctuation: f'#ff00c0 bg:{background_color}',
 tokens.Comment: f'italic #800080 bg:{background_color}',
 }
def script2img(filepath: Path) -> None:
 formatter = ImageFormatter(
 style=MyStyle, full=True, line_pad=3,
 font_name='Source Code Pro', font_size=16,
 line_number_bold=True,
 line_number_fg=MyStyle.line_number_color,
 line_number_bg=MyStyle.line_number_background_color,
 )
 buffer = highlight(
 code=filepath.read_text(),
 lexer=get_lexer_for_filename(filepath),
 formatter=formatter,
 )
 with filepath.with_suffix(
 '.' + formatter.image_format
 ).open('wb') as f:
 f.write(buffer)
if __name__ == '__main__':
 script2img(Path('275768.py'))

Output

Rendering its own source,

rendered python source

This works equally well (still without passing a language) when inferring Java:

rendered java source

Caching configuration

If you're rendering multiple files at a time, you may want to make a class version that caches the instantiated formatter:

from pathlib import Path
from pygments import highlight
from pygments.formatters import ImageFormatter
from pygments.lexers import get_lexer_for_filename
from pygments.style import Style
import pygments.token as tokens
class MyStyle(Style):
 background_color = '#00122a'
 # Only applies to HtmlFormatter
 line_number_background_color = '#003b6f'
 line_number_color = '#ff69b4'
 styles = {
 tokens.Keyword: f'bold #07487f bg:{background_color}',
 tokens.Name: f'#7b68ee bg:{background_color}',
 tokens.Name.Builtin: 'italic #fb4570',
 tokens.Name.Class: 'bold italic #ffd700',
 tokens.Name.Constant: 'bold',
 tokens.Name.Decorator: 'bold italic',
 tokens.Name.Exception: 'bold italic #ff005d',
 tokens.Name.Function: 'bold italic #134dbc',
 tokens.Name.Namespace: 'bold italic #ffa000',
 tokens.Name.Variable.Class: 'bold',
 tokens.Name.Variable.Global: 'bold',
 tokens.Literal: f'#00ff80 bg:{background_color}',
 tokens.String: f'#20ff40 bg:{background_color}',
 tokens.String.Escape: 'italic #d06000',
 tokens.Number: f'#00c0ff bg:{background_color}',
 tokens.Operator: f'bold #408020 bg:{background_color}',
 tokens.Operator.Word: 'bold #0000f0',
 tokens.Punctuation: f'#ff00c0 bg:{background_color}',
 tokens.Comment: f'italic #800080 bg:{background_color}',
 }
class Highlighter:
 def __init__(self) -> None:
 self.formatter = ImageFormatter(
 style=MyStyle, full=True, line_pad=3,
 font_name='Source Code Pro', font_size=16,
 line_number_bold=True,
 line_number_fg=MyStyle.line_number_color,
 line_number_bg=MyStyle.line_number_background_color,
 )
 self.extension = '.' + self.formatter.image_format
 def script_to_image(self, filepath: Path) -> Path:
 buffer = highlight(
 code=filepath.read_text(),
 lexer=get_lexer_for_filename(filepath),
 formatter=self.formatter,
 )
 out_path = filepath.with_suffix(self.extension)
 with out_path.open('wb') as f:
 f.write(buffer)
 return out_path
if __name__ == '__main__':
 Highlighter().script_to_image(Path('275768.py'))

Question 3

Good answer, but I can't accept it just yet. You know, gotta wait for 24 hours so people around the globe can have a chance to see the question and post an answer.

Question 4

About splitting and joining the text, it is to remove trailing white spaces, I sometimes write code on Windows Terminal to see what the output will be, and I then copy pasted the code to Visual Studio Code, I used Alt+Drag to select the code, and this will leave trailing spaces, I guess they will make the output sub-optimal so I want to remove them.

Question 5

The only effect will be a broadened right margin. I consider that harmless, but if you really care about it, you could follow your original approach or do something like a re.sub(r' *\n', '\n'.

Reinderien 71.3k5 gold badges76 silver badges257 bronze badges · Accepted Answer · 2022-04-15 14:53:08Z

You're importing a lot of symbols from pygments.token. It is probably cleaner to alias that import and clean up your namespace a bit.

I don't see why you're jumping through so many hoops when splitting and joining your text. Why not just read the entire file as a string and leave it be?

pygments is poorly designed. It has a lot of internal inconsistencies, such as some Style members being respected for some formatter subclasses and not others. You should be moving as much styling information to your style class as possible. pygments has a deeply confused interpretation of line_number_bg: it can (and often is) expressed as Style.line_number_background_color, but the latter is only respected in CSS mode. Still: better to define it on your class, and manually pass it in for the ImageFormatter constructor that is broken and does not respect the style.

Do not use Numpy or Pillow and do not replace your background by colour. Instead, define Style.background_color.

Both your BytesIO and your Image should be put in a with for guaranteed closure.

Unless you need to do something strange and non-standard, it is more convenient to remove your language argument and replace get_lexer_by_name with get_lexer_for_filename.

If you absolutely require a .png you should be passing that format explicitly to the formatter. Otherwise, a reasonable implementation would be a script2img that allows the ImageFormatter to use its default format, whatever that may be; does not hard-code the .png suffix; and instead pulls that from formatter.image_format.

Suggested

from pathlib import Path
from pygments import highlight
from pygments.formatters import ImageFormatter
from pygments.lexers import get_lexer_for_filename
from pygments.style import Style
import pygments.token as tokens
class MyStyle(Style):
 background_color = '#00122a'
 # Only applies to HtmlFormatter
 line_number_background_color = '#003b6f'
 line_number_color = '#ff69b4'
 styles = {
 tokens.Keyword: f'bold #07487f bg:{background_color}',
 tokens.Name: f'#7b68ee bg:{background_color}',
 tokens.Name.Builtin: 'italic #fb4570',
 tokens.Name.Class: 'bold italic #ffd700',
 tokens.Name.Constant: 'bold',
 tokens.Name.Decorator: 'bold italic',
 tokens.Name.Exception: 'bold italic #ff005d',
 tokens.Name.Function: 'bold italic #134dbc',
 tokens.Name.Namespace: 'bold italic #ffa000',
 tokens.Name.Variable.Class: 'bold',
 tokens.Name.Variable.Global: 'bold',
 tokens.Literal: f'#00ff80 bg:{background_color}',
 tokens.String: f'#20ff40 bg:{background_color}',
 tokens.String.Escape: 'italic #d06000',
 tokens.Number: f'#00c0ff bg:{background_color}',
 tokens.Operator: f'bold #408020 bg:{background_color}',
 tokens.Operator.Word: 'bold #0000f0',
 tokens.Punctuation: f'#ff00c0 bg:{background_color}',
 tokens.Comment: f'italic #800080 bg:{background_color}',
 }
def script2img(filepath: Path) -> None:
 formatter = ImageFormatter(
 style=MyStyle, full=True, line_pad=3,
 font_name='Source Code Pro', font_size=16,
 line_number_bold=True,
 line_number_fg=MyStyle.line_number_color,
 line_number_bg=MyStyle.line_number_background_color,
 )
 buffer = highlight(
 code=filepath.read_text(),
 lexer=get_lexer_for_filename(filepath),
 formatter=formatter,
 )
 with filepath.with_suffix(
 '.' + formatter.image_format
 ).open('wb') as f:
 f.write(buffer)
if __name__ == '__main__':
 script2img(Path('275768.py'))

Output

Rendering its own source,

rendered python source

This works equally well (still without passing a language) when inferring Java:

rendered java source

Caching configuration

If you're rendering multiple files at a time, you may want to make a class version that caches the instantiated formatter:

from pathlib import Path
from pygments import highlight
from pygments.formatters import ImageFormatter
from pygments.lexers import get_lexer_for_filename
from pygments.style import Style
import pygments.token as tokens
class MyStyle(Style):
 background_color = '#00122a'
 # Only applies to HtmlFormatter
 line_number_background_color = '#003b6f'
 line_number_color = '#ff69b4'
 styles = {
 tokens.Keyword: f'bold #07487f bg:{background_color}',
 tokens.Name: f'#7b68ee bg:{background_color}',
 tokens.Name.Builtin: 'italic #fb4570',
 tokens.Name.Class: 'bold italic #ffd700',
 tokens.Name.Constant: 'bold',
 tokens.Name.Decorator: 'bold italic',
 tokens.Name.Exception: 'bold italic #ff005d',
 tokens.Name.Function: 'bold italic #134dbc',
 tokens.Name.Namespace: 'bold italic #ffa000',
 tokens.Name.Variable.Class: 'bold',
 tokens.Name.Variable.Global: 'bold',
 tokens.Literal: f'#00ff80 bg:{background_color}',
 tokens.String: f'#20ff40 bg:{background_color}',
 tokens.String.Escape: 'italic #d06000',
 tokens.Number: f'#00c0ff bg:{background_color}',
 tokens.Operator: f'bold #408020 bg:{background_color}',
 tokens.Operator.Word: 'bold #0000f0',
 tokens.Punctuation: f'#ff00c0 bg:{background_color}',
 tokens.Comment: f'italic #800080 bg:{background_color}',
 }
class Highlighter:
 def __init__(self) -> None:
 self.formatter = ImageFormatter(
 style=MyStyle, full=True, line_pad=3,
 font_name='Source Code Pro', font_size=16,
 line_number_bold=True,
 line_number_fg=MyStyle.line_number_color,
 line_number_bg=MyStyle.line_number_background_color,
 )
 self.extension = '.' + self.formatter.image_format
 def script_to_image(self, filepath: Path) -> Path:
 buffer = highlight(
 code=filepath.read_text(),
 lexer=get_lexer_for_filename(filepath),
 formatter=self.formatter,
 )
 out_path = filepath.with_suffix(self.extension)
 with out_path.open('wb') as f:
 f.write(buffer)
 return out_path
if __name__ == '__main__':
 Highlighter().script_to_image(Path('275768.py'))

Good answer, but I can't accept it just yet. You know, gotta wait for 24 hours so people around the globe can have a chance to see the question and post an answer.
About splitting and joining the text, it is to remove trailing white spaces, I sometimes write code on Windows Terminal to see what the output will be, and I then copy pasted the code to Visual Studio Code, I used Alt+Drag to select the code, and this will leave trailing spaces, I guess they will make the output sub-optimal so I want to remove them.
The only effect will be a broadened right margin. I consider that harmless, but if you really care about it, you could follow your original approach or do something like a re.sub(r' *\n', '\n'.

Stack Exchange Network

Python script to convert scripts to png files

Code

1 Answer 1

Suggested

Output

Caching configuration

You must log in to answer this question.

Hot Network Questions

Python script to convert scripts to png files

Code

1 Answer 1

Suggested

Output

Caching configuration

You must log in to answer this question.

Related

Hot Network Questions