I've made a simple in-browser syntax highlighter using JQuery. It's pretty simple, it just wraps keywords, integers, and such in <span>
elements with color styles.
"use strict";
/**
* Replaces any JS code with
* highlighted JS code.
*/
function colorJS(element) {
$(element).html(
$(element).text()
.replace(/\bfunction\b/g, '<span style="color:blue">function</span>')
.replace(/\bvar\b/g, '<span style="color:blue">var</span>')
.replace(/\bwhile\b/g, '<span style="color:blue">while</span>')
.replace(/\bdo\b/g, '<span style="color:blue">do</span>')
.replace(/\bfor\b/g, '<span style="color:blue">for</span>')
.replace(/\bif\b/g, '<span style="color:blue">if</span>')
.replace(/\belse\b/g, '<span style="color:blue">else</span>')
.replace(/\btry\b/g, '<span style="color:blue">try</span>')
.replace(/\bcatch\b/g, '<span style="color:blue">catch</span>')
.replace(/\bthrow\b/g, '<span style="color:blue">throw</span>')
.replace(/\bswitch\b/g, '<span style="color:blue">switch</span>')
.replace(/\bcase\b/g, '<span style="color:blue">case</span>')
.replace(/\bin\b/g, '<span style="color:blue">in</span>')
.replace(/\breturn\b/g, '<span style="color:blue">return</span>')
.replace(/\bnull\b/g, '<span style="color:LightGreen">null</span>')
.replace(/\bthis\b/g, '<span style="color:LightGreen">this</span>')
.replace(/0/g, '<span style="color:blue">0</span>')
.replace(/1/g, '<span style="color:blue">1</span>')
.replace(/2/g, '<span style="color:blue">2</span>')
.replace(/3/g, '<span style="color:blue">3</span>')
.replace(/4/g, '<span style="color:blue">4</span>')
.replace(/5/g, '<span style="color:blue">5</span>')
.replace(/6/g, '<span style="color:blue">6</span>')
.replace(/7/g, '<span style="color:blue">7</span>')
.replace(/8/g, '<span style="color:blue">8</span>')
.replace(/9/g, '<span style="color:blue">9</span>')
);
}
/**
* Replaces any Python code with
* highlighted python code.
*/
function colorPy(element) {
$(element).html(
$(element).text()
.replace(/\bprint\b/g, '<span style="color:Gold">print</span>')
.replace(/\bdef\b/g, '<span style="color:Gold">def</span>')
.replace(/\bpass\b/g, '<span style="color:Gold">pass</span>')
.replace(/\bclass\b/g, '<span style="color:Gold">class</span>')
.replace(/\bself\b/g, '<span style="color:Gold">self</span>')
.replace(/\bif\b/g, '<span style="color:Gold">if</span>')
.replace(/\belif\b/g, '<span style="color:Gold">elif</span>')
.replace(/\belse\b/g, '<span style="color:Gold">else</span>')
.replace(/\bglobal\b/g, '<span style="color:Gold">global</span>')
.replace(/\bimport\b/g, '<span style="color:Gold">import</span>')
.replace(/\bfrom\b/g, '<span style="color:Gold">from</span>')
.replace(/\btry\b/g, '<span style="color:Gold">try</span>')
.replace(/\bexcept\b/g, '<span style="color:Gold">except</span>')
.replace(/\bfinally\b/g, '<span style="color:Gold">finally</span>')
.replace(/\breturn\b/g, '<span style="color:Gold">return</span>')
.replace(/\bfor\b/g, '<span style="color:Gold">for</span>')
.replace(/\bin\b/g, '<span style="color:Gold">in</span>')
.replace(/\bis\b/g, '<span style="color:Gold">is</span>')
.replace(/\bbreak\b/g, '<span style="color:Gold">break</span>')
.replace(/\bcontinue\b/g, '<span style="color:Gold">continue</span>')
.replace(/\bassert\b/g, '<span style="color:Gold">assert</span>')
.replace(/\bas\b/g, '<span style="color:Gold">as</span>')
.replace(/\bwith\b/g, '<span style="color:Gold">with</span>')
.replace(/\byield\b/g, '<span style="color:Gold">yield</span>')
.replace(/\blambda\b/g, '<span style="color:Gold">lambda</span>')
.replace(/\band\b/g, '<span style="color:Gold">and</span>')
.replace(/\bor\b/g, '<span style="color:Gold">or</span>')
.replace(/\bnot\b/g, '<span style="color:Gold">not</span>')
.replace(/\bdel\b/g, '<span style="color:Gold">del</span>')
.replace(/\bexec\b/g, '<span style="color:Gold">exec</span>')
.replace(/\bNone\b/g, '<span style="color:Purple">None</span>')
.replace(/\bTrue\b/g, '<span style="color:Purple">True</span>')
.replace(/\bFalse\b/g, '<span style="color:Purple">False</span>')
.replace(/\bobject\b/g, '<span style="color:Purple">object</span>')
.replace(/\bint\b/g, '<span style="color:Purple">int</span>')
.replace(/\bstr\b/g, '<span style="color:Purple">str</span>')
.replace(/\bhex\b/g, '<span style="color:Purple">hex</span>')
.replace(/\bbin\b/g, '<span style="color:Purple">bin</span>')
.replace(/\bhash\b/g, '<span style="color:Purple">hash</span>')
.replace(/\brange\b/g, '<span style="color:Purple">range</span>')
.replace(/0/g, '<span style="color:blue">0</span>')
.replace(/1/g, '<span style="color:blue">1</span>')
.replace(/2/g, '<span style="color:blue">2</span>')
.replace(/3/g, '<span style="color:blue">3</span>')
.replace(/4/g, '<span style="color:blue">4</span>')
.replace(/5/g, '<span style="color:blue">5</span>')
.replace(/6/g, '<span style="color:blue">6</span>')
.replace(/7/g, '<span style="color:blue">7</span>')
.replace(/8/g, '<span style="color:blue">8</span>')
.replace(/9/g, '<span style="color:blue">9</span>')
);
}
/**
* Run a highlighting function
* here for the specific type
* of code you want to highlight.
*/
$(document).ready(function() {
colorJS('.code-js');
colorPy('.code-py');
});
<html>
<head>
<meta charset="utf-8" />
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<title>Test</title>
</head>
<body>
<p>JavaScript code</p>
<pre class="code-js">
var a = 10 * 2;
var b = null;
var c = 'ergo';
function func() {
return null && this;
}
if(a) {
}
else if(!a) {
}
else {
}
while(a) {
}
do {
} while(a);
for(var n = 0; n <= 10; ++n) {
}
switch(a) {
case 1:
console.log(f);
}
for(var n in [1, 2, 3]) {
}
try {
}
catch(error) {
}
</pre>
<p>Python code.</p>
<pre class="code-py">
from random import randint
print "Hello world."
x = str(10)
f = lambda x: hash(x) * 2
assert int(bin(x))
del x
exec("__import__('hello')")
a = not None
b = True and False
c = 1 or False
with a as 10:
pass
def function():
global x
return x
class Test(object):
def __init__(self):
pass
if True:
pass
elif True:
pass
else:
pass
try:
pass
except Exception:
pass
finally:
pass
for n in range(100):
continue
break
if None is None:
pass
</pre>
</body>
</html>
This is obviously not quite complete, and it's very monolithic code, but any suggestions are welcome.
For those interested, I've completely revamped this with a new, extensible version that you can find here.
2 Answers 2
To make it more maintainable and reduce duplication, I'd list the keywords etc. in arrays, and use classes for the styling.
That fits nicely as an object with class names as the keys, and an array of strings as values, e.g.:
var syntax = {
keyword: ["function", "var", "while", ... ],
globals: ["null", "this", "undefined"],
literal: ["\\d+"] // numbers
};
Note the double-escaping. To match digits the simplest regex is just \d+
, but it still has to be expressed as a string rather than a regex literal, which means escaping the backslash with another backslash (\\d
).
Each array can then be joined into a branching regex like so:
new RegExp("\\b(" + array.join("|") + ")\\b", "g")
which, for the globals
array above will equal the following regex literal:
/\b(null|this|undefined)\b/g
... which can then be used for replacing parts of the string.
Last trick is to use replace()
with a callback as the 2nd argument. The callback receives each capture group as an argument (i.e. 1st argument is capture 0 - the whole match - while 1, 2, 3, etc. refers to explicit groups, if any) and its return value is used as the replacement string. Thus you avoid hardcoding the replacement string.
By the way, I'd also wrap text in <code>
elements instead of spans, as it's more semantically fitting.
Here's an example (just for the JS code):
"use strict";
function highlight(text, syntax) {
var className, regex, list;
for(className in syntax) {
if(!syntax.hasOwnProperty(className)) continue;
list = syntax[className].join("|");
regex = new RegExp("\\b(" + list + ")\\b", "g");
text = text.replace(regex, function (_, string) {
return '<code class="' + className + '">' + string + '</code>';
});
}
return text;
}
function colorJS(element) {
var text, html;
text = $(element).text();
html = highlight(text, {
keyword: ["function", "var", "while", "do", "for", "if", "else", "try", "catch", "throw", "switch", "case", "in", "return"],
globals: ["null", "this"],
literal: ["\\d+"]
});
$(element).html(html);
}
$(document).ready(function() {
colorJS('.code-js');
});
.code-js code.keyword { color: blue }
.code-js code.globals { color: lightgreen }
.code-js code.literal { color: green }
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p>JavaScript code</p>
<pre class="code-js">
var a = 10 * 2;
var b = null;
var c = 'ergo';
function func() {
return null && this;
}
if(a) {
}
else if(!a) {
}
else {
}
while(a) {
}
do {
} while(a);
for(var n = 0; n <= 10; ++n) {
}
switch(a) {
case 1:
console.log(f);
}
for(var n in [1, 2, 3]) {
}
try {
}
catch(error) {
}
</pre>
-
1\$\begingroup\$ I trully hate that light green. But here is a regex for matching string literals:
((["'])(?:[^2円\n]|\\2円)*2円)
. You probably will want to change the2円
into3円
. But it works pretty well! You can test it on regex101.com/r/fZ9cR2/1. \$\endgroup\$Ismael Miguel– Ismael Miguel2015年05月11日 08:37:59 +00:00Commented May 11, 2015 at 8:37 -
1\$\begingroup\$ @IsmaelMiguel Wouldn't
((".*")|('.*'))
work as well? \$\endgroup\$Ethan Bierlein– Ethan Bierlein2015年05月11日 13:04:59 +00:00Commented May 11, 2015 at 13:04 -
\$\begingroup\$ I have no idea, I didn't tested that. But most likely it will! \$\endgroup\$Ismael Miguel– Ismael Miguel2015年05月11日 13:07:49 +00:00Commented May 11, 2015 at 13:07
-
1\$\begingroup\$ @EthanBierlein It'll work for the simplest strings, but will fail if A) there are two strings on the same line (it'll match the first quote on the line, and the last one), or B) if there are escaped quotes in a string. E.g.
'here\'s a string'
will only match'here\'
. \$\endgroup\$Flambino– Flambino2015年05月11日 13:49:35 +00:00Commented May 11, 2015 at 13:49
Rather than one regex for each digit, it would be better to recognize [0-9]+
as a number to be wrapped in a single contiguous <span>
. Better yet, you should recognize more than just positive integers, but also numbers like -123.45e3
.
Instead of hard-coding the colours, mark the span with a class and use CSS.
In the end, though, I'm not convinced that this is a good approach. If you naively look for keywords, then keywords that happen to appear inside string literals or regular expression literals will result in false matches. Since one of the goals of syntax highlighting is to help you spot silly syntax errors, misleading syntax highlighting is worse than no highlighting at all.