I've written a custom Live Search & Highlight function in vanilla JS. It is working and does what I expect it to.
The issue is that the more items I add to the page content to search, the slower the search/highlight function becomes. I'm looking for speed/efficiency improvements so that it can handle larger data sets without lag.
Here's the main function in question (I have similar functions for different data sets with different filters and layouts):
function portsliveSearch() {
// get all card elements
var cards = document.querySelectorAll('table.type-port');
// get ports checkboxes
var protocols = document.getElementsByClassName('protocolcheckbox');
// get the search query input
var search_query = document.getElementById("portssearchbox").value;
search_query = escapeHtml(search_query);
//split query on spaces populate to an array
var search_query_array = [];
if (search_query.length !== 0) {
search_query_array = search_query.trim().toLowerCase().split(/\s+/);
}
var searchableContent = [];
// let's handle the protocols toggle first
var active_protocols_array = [];
// first let's see what's active
for (n = 0; n < protocols.length; n++) {
if (protocols[n].checked === true) {
active_protocols_array.push(protocols[n].value);
}
}
var all_checkbox = document.getElementById('protocolsallcheckbox');
// if the active item count does not equal the total item count
if (active_protocols_array.length !== protocols.length) {
// are all of them checked EXCEPT for "all"?
if (active_protocols_array.length === protocols.length-1) {
if (!active_protocols_array.includes("all")) {
// let's enable the "all toggle"
all_checkbox.checked = true;
active_protocols_array.push("all");
// else, it means all is selected, but something else is not
} else {
// let's disable the all toggle
all_checkbox.checked = false;
active_protocols_array.splice(0, 1);
}
}
}
// now if the active item count is 1, and the value is "all"
if (active_protocols_array.length === 1) {
if (active_protocols_array[0] === "all") {
// let's disable the all toggle
all_checkbox.checked = false;
active_protocols_array.pop();
}
}
// if the protocols are empty
if (active_protocols_array.length === 0) {
for (var i = 0; i < cards.length; i++) {
// hide the card
cards[i].classList.add("is-hidden");
}
// else if all protocols are checked and there's no search query
} else if (active_protocols_array.length === protocols.length && search_query_array.length === 0) {
for (var i = 0; i < cards.length; i++) {
// show the card
cards[i].classList.remove("is-hidden");
// get all searchable content areas in card
searchableContent = cards[i].getElementsByClassName("searchable-port");
// remove all current mark tags
for (var l = 0; l < searchableContent.length; l++) {
searchableContent[l].innerHTML = displayHtml(searchableContent[l].innerHTML).replaceAll("<mark>","").replaceAll("</mark>","");
}
}
// else if there's items in the search query, or if there are any protocols deselectedselected
} else if (search_query_array.length !== 0 || active_protocols_array.length !== protocols.length) {
// loop through all cards
for (var i = 0; i < cards.length; i++) {
// let's thin out the search by only processing cards that match the selected ports
var include_card = false;
for (n = 0; n < protocols.length; n++) {
if (protocols[n].checked) {
if (cards[i].classList.contains('protocol-'+protocols[n].value) ) {
include_card = true;
break;
}
}
}
if (include_card === true) {
// get all searchable content areas in card
searchableContent = cards[i].getElementsByClassName("searchable-port");
// remove all current mark tags
for (var l = 0; l < searchableContent.length; l++) {
searchableContent[l].innerHTML = displayHtml(searchableContent[l].innerHTML).replaceAll("<mark>","").replaceAll("</mark>","");
}
// set all string matches to false
var item_match = [];
for (var j = 0; j < search_query_array.length; j++) {
item_match[j] = false;
}
// loop through all searchable content
for (var k = 0; k < searchableContent.length; k++) {
// loop through search strings
for (var j = 0; j < search_query_array.length; j++) {
// set array index to true if there's a match
if (searchableContent[k].innerHTML.toLowerCase().includes(displayHtml(search_query_array[j]))) {
item_match[j] = true;
}
}
}
// now let's check if any search strings are false and hide the card if so
if (item_match.includes(false)) {
cards[i].classList.add("is-hidden");
} else {
cards[i].classList.remove("is-hidden");
// no let's mark the content in this card
for (var k = 0; k < searchableContent.length; k++) {
// loop through search terms input array
for (var j = 0; j < search_query_array.length; j++) {
// if search term is in the searchable content
if (searchableContent[k].innerHTML.toLowerCase().includes(displayHtml(search_query_array[j]))) {
//do_mark = true;
searchableContent[k].innerHTML = searchableContent[k].innerHTML.replace(new RegExp(displayHtml(search_query_array[j]), "gi"), (match) => {
return `<mark>${match}</mark>`;
});
}
}
}
}
// else protocol for this card is not included in search
} else {
// hide the card
cards[i].classList.add("is-hidden");
}
}
// else, no search string or no protocol filters
} else {
// loop through all cards
for (var i = 0; i < cards.length; i++) {
// get all searchable content areas in card
searchableContent = cards[i].getElementsByClassName("searchable-port");
// loop through searchable content within card
for (var l = 0; l < searchableContent.length; l++) {
// remove all current mark tags
searchableContent[l].innerHTML = displayHtml(searchableContent[l].innerHTML).replaceAll("<mark>","").replaceAll("</mark>","");
}
// set all cards to show
cards[i].classList.remove("is-hidden");
}
}
}
Here's a working JS Fiddle with about 1400 entries (3 searchable content areas per entry) in the dataset. Yes, it works, but notice how the first character or two or three of the search string has a lag until the visible content areas thins out a little. Same thing for deleting characters from the search string. The more characters you have (less content areas to search), the faster it is. Same thing with toggling the "All" checkbox. Disable all is quick but re-enabling all has a delay. Those are the things I'm looking to improve speed/efficiency for.
This is about the 3rd revision of the search/highlight function. The first ones were way too slow with dataset entries larger than about 500. I've broken down handling specific cases of empty search string and empty filters, empty search string but has filters, and has search string and filters to minimize the processing per loop. This version is much better than the previous ones. I want to add more entries (about 2,500 total), but need to speed up the processing of this live search/highlight function before I can justify doing it! Thank you in advance for any suggestions you can offer to optimize this search/highlight function.
2 Answers 2
Great job on making an easily reproducible example. Here are some suggestions:
- You can benchmark with something like
new Date().getTime()
. - Find the worst-case. Starting with all checkboxes, the search query
' '
takes up to1400 ms
. The slowest letter was'e'
, taking up to400 ms
. - To speed up the
' '
query, usefilter(x => x)
to prevent empty query strings. - Reduce duplicate computations by assigning results to variables. As an example, you don't need to unmark the
innerHTML
if you save a copy at the beginning. ++x
is theoretically faster thanx++
and has the same behavior in most cases.- Prefer
const
for expressing intent. Theoretically, it is also faster. - Declare functions before use to improve readability.
- Try to make names as consistent as possible (I prefer bucket case).
- Avoid using
l
as a variable because it looks like1
in some fonts. - Isolate functionality when you can. String splitting and marking don't need to touch the rest of the code. You can set all the
is-hidden
classes in one pass. The toggled checkboxes / search bar only overlap in that cards can be hidden by the toggle or hidden by the search. You can separate toggle / search to avoid duplicating effort; skipping search marks based on inactive checkboxes does not save time if those checkboxes become active later. - This may be controversial, but I prefer to avoid special cases (e.g. empty search bar, all toggled, none toggled). If you need even more performance, they might be justified.
Here is an implementation using the suggestions. You can paste it straight into the JavaScript portion of the Fiddle without any other modifications.
const entityMap = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": ''',
'/': '/',
'`': '`',
'=': '='
};
function entityLookup(s) {
return entityMap[s];
}
function escapeHtml(string) {
return String(string).replace(/[&<>"'`=\/]/g, entityLookup);
}
function displayHtml(string) {
return String(string)
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(///g, '/')
.replace(/`/g, '`')
.replace(/&/g, '=');
}
function mark(string) {
return `<mark>${string}</mark>`;
}
// gets the non-empty parts (as lowercase) of a string
function get_nonempty_parts(string) {
const query = escapeHtml(string);
if (query.length < 1)
return [];
return query.trim().toLowerCase().split(/\s+/).filter(x => x);
}
// tries to mark a card's search areas for all given parts and returns
// whether all parts led to marked areas
// parts: an array of terms to be searched for
// areas = card.getElementsByClassName("searchable-port")
// clean = `${x.innerHTML}` for x in areas (unmarked copy)
// lower = x.toLowerCase() for x in clean (case-insensitive)
function all_parts_marked(parts, areas, clean, lower) {
// part_in_area[k][j] is whether a part (index k) is in an area (index j)
const part_in_area = new Array(parts.length);
for (let k = 0; k < parts.length; ++k)
{
part_in_area[k] = new Array(areas.length);
for (let j = 0; j < areas.length; ++j)
part_in_area[k][j] = lower[j].includes(parts[k]);
// if a part isn't in any area, not all parts led to marked areas
if (!part_in_area[k].includes(true))
return false;
}
// replace all inner areas with their clean versions
for (let j = 0; j < areas.length; ++j)
areas[j].innerHTML = clean[j];
for (let k = 0; k < parts.length; ++k) {
const reg = new RegExp(parts[k], "gi");
for (let j = 0; j < areas.length; ++j)
if (part_in_area[k][j])
areas[j].innerHTML = areas[j].innerHTML.replace(reg, mark);
}
return true;
}
// one-time document selectors for future use
const p_checks = document.getElementsByClassName('protocolcheckbox');
const all_p_check = document.getElementById('protocolsallcheckbox');
const search_box = document.getElementById("portssearchbox");
const cards = document.querySelectorAll('table.type-port');
// whether each card (index i) is hidden by toggle / search
// note: by default, each value is undefined (evaluates to false)
let hidden_by_toggle = new Array(cards.length);
let hidden_by_search = new Array(cards.length);
// for each card (index i), show it or hide it based on toggle / search
function show_or_hide_cards() {
for (let i = 0; i < cards.length; ++i) {
if (hidden_by_toggle[i] || hidden_by_search[i])
cards[i].classList.add("is-hidden");
else
cards[i].classList.remove("is-hidden");
}
}
// card_has_protocol[i][p] is whether card i contains protocol p
const card_has_protocol = new Array(cards.length);
for (let i = 0; i < cards.length; ++i) {
// note: by default, each value is undefined (evaluates to false)
card_has_protocol[i] = new Array(p_checks.length);
for (let p = 0; p < p_checks.length; ++p)
if (p_checks[p].value != "all") // ignore "all" (Toggle All)
card_has_protocol[i][p] = cards[i].classList.contains(
'protocol-' + p_checks[p].value);
}
// computes whether card i is hidden based on checkboxes
function card_is_hidden_by_toggle(i) {
for (let p = 0; p < p_checks.length; ++p)
if (p_checks[p].checked && card_has_protocol[i][p])
return false;
return true;
}
// precompute areas, clean (copy), lower (insensitive) for each card i
const cards_areas = new Array(cards.length);
const cards_clean = new Array(cards.length);
const cards_lower = new Array(cards.length);
for (let i = 0; i < cards.length; ++i) {
cards_areas[i] = cards[i].getElementsByClassName("searchable-port");
cards_clean[i] = new Array(cards_areas[i].length);
cards_lower[i] = new Array(cards_areas[i].length);
for (let j = 0; j < cards_areas[i].length; ++j)
{
cards_clean[i][j] = `${cards_areas[i][j].innerHTML}`;
cards_lower[i][j] = cards_clean[i][j].toLowerCase();
}
}
// --------------------
// LISTENERS AND TIMING
// --------------------
function portsLiveToggle(event) {
const time1 = new Date().getTime();
// count the number of active checkboxes
let check_count = 0;
for (let p = 0; p < p_checks.length; ++p)
if (p_checks[p].checked)
++check_count;
// set "Toggle All" based on count
// note: all_p_check is included in p_checks
if (all_p_check.checked) {
if (check_count < p_checks.length)
all_p_check.checked = false;
}
else {
if (check_count == p_checks.length - 1)
all_p_check.checked = true;
}
// get card visibility from checkboxes
for (let i = 0; i < cards.length; ++i)
hidden_by_toggle[i] = card_is_hidden_by_toggle(i);
show_or_hide_cards(); // is-hidden or not
const time2 = new Date().getTime();
console.log('toggle time: ' + (time2 - time1));
}
function portsliveSearch() {
const time1 = new Date().getTime();
// split the search box to form parts in HTML
let search_parts = get_nonempty_parts(search_box.value);
for (let k = 0; k < search_parts.length; ++k)
search_parts[k] = displayHtml(search_parts[k]);
// if not all search parts could be marked, hide the card
for (let i = 0; i < cards.length; ++i)
hidden_by_search[i] = !all_parts_marked(
search_parts, cards_areas[i], cards_clean[i], cards_lower[i]);
show_or_hide_cards(); // is-hidden or not
const time2 = new Date().getTime();
console.log('search time: ' + (time2 - time1));
}
for (let p = 0; p < p_checks.length; ++p)
p_checks[p].addEventListener("click", portsLiveToggle);
function protocoltoggle(source) {
for (let p = 0; p < p_checks.length; ++p)
p_checks[p].checked = source.checked;
portsLiveToggle("");
}
To the best of my knowledge, it behaves just like the original for non-empty string queries. The ' '
and 'e'
queries take up to 30 ms
, a >12x speedup. This also goes below the threshold of 100 ms
, which most users perceive as instantaneous. Hope this helps!
-
1\$\begingroup\$ WOW! This is a better response than I was expecting. I'm reviewing it now, testing, benchmarking, and will be rating this answer shortly. I've been coding for decades, but heavily in PHP (probably obvious with my code structure). I can do JS, but I'm not great at it, and learned a TON from your post above. Thank you!!!! \$\endgroup\$codejp3– codejp32024年09月18日 13:04:23 +00:00Commented Sep 18, 2024 at 13:04
-
\$\begingroup\$ Happy to help! I tried to strike a balance between performance and simplicity: caching lower vs calling toLowerCase, card_has_protocol vs recomputing every time, etc. The priorities / runtimes may be different for your use case / machine and justify different choices. A logical next step might be grouping all shared state per table - this would simplify handling multiple tables per page. You might also want to write some test cases (e.g. expected number of entries from given toggle / search inputs) to catch bugs. \$\endgroup\$Justin Chang– Justin Chang2024年09月18日 17:41:12 +00:00Commented Sep 18, 2024 at 17:41
-
\$\begingroup\$ Also, please note that I chose to specify array sizes in advance with "new Array(n)" due to preference / readability - I did not observe any effect on the benchmarks. \$\endgroup\$Justin Chang– Justin Chang2024年09月18日 18:06:54 +00:00Commented Sep 18, 2024 at 18:06
Variables
Use const/let instead of var. This is a better way to keep track of variables that are changed troughout the process or vice versa, does not change.
Create globals so it won't run everytime you enter your function, which already improves your speed as e.g. the checkboxes of protocol never change. The state does but not the count of the checkboxes. If you define this outside your function, it doesn't need to find those elements again and again.
Last but not least, variables in JS usually are camelCase and not snake_case. This will not improve your speed but it is an extra to make your code clear for other JS developers.
If structures
You have way too much if structures in your function. This makes it hard to read and sometimes it is even redundant to do.
You check on values which are true or false (or truthy and falsey) which is not necessary. This also does not improve speed, but less code equals to more readable code. For example:
if (protocols[n].checked === true) {
active_protocols_array.push(protocols[n].value);
}
Is actually just this
if (protocols[n].checked) {
active_protocols_array.push(protocols[n].value);
}
You also nest If's that can be linked together
if (active_protocols_array.length === 1) {
if (active_protocols_array[0] === "all") {
// let's disable the all toggle
all_checkbox.checked = false;
active_protocols_array.pop();
}
}
Could be just this
if (active_protocols_array.length && active_protocols_array[0] === "all") {
all_checkbox.checked = false;
active_protocols_array.pop();
}
Lastly, you use else if quit a lot. Consider replace this with switch cases
Functions
This is the place we can improve the most in both readability as well as performance. You now have one big function with all your logic in it. This is way too bloated and they should split up their functionalities.
For starters, your filters above should only get triggered when they change. So use a change event handler for this.
This is best shown in the full code provided below.
Debounce
A popular way to limit the request made when searching for something is to use a debounce function. This will wait until the user is done typing, so if you search for 'tcp' it will only search once instead of on 't', 'tc' and 'tcp'
function debounce(func, delay) {
let timer;
return function (...args) {
clearTimeout(timer);
timer = setTimeout(() => func.apply(this, args), delay);
};
}
Conclusion
Readability: Break up your logic into smaller, manageable chunks, making it easier to understand and modify. Use naming conventions to further improve readability.
Performance: Cache your DOM elements, as they aren't dynamic. Debouncing prevents excessive actions when typing.
Maintainability: Convert each function so it has a single responsibility, which simplifies debugging and further changes.
Final code
// Global constants
const cards = document.querySelectorAll('table.type-port');
const protocols = Array.from(document.getElementsByClassName('protocolcheckbox'));
const allCheckbox = document.getElementById('protocolsallcheckbox');
const searchBox = document.getElementById("portssearchbox");
// Utility: escape HTML to prevent XSS
function escapeHtml(text) {
const map = { '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' };
return text.replace(/[&<>"']/g, function (m) { return map[m]; });
}
// Utility: restore display HTML
function displayHtml(text) {
return text; // Assuming this function restores escaped HTML
}
// Utility: Debounce for better performance when typing
function debounce(func, delay) {
let timer;
return function (...args) {
clearTimeout(timer);
timer = setTimeout(() => func.apply(this, args), delay);
};
}
// Utility: Mark search terms in content
function markSearchTerm(content, searchTerms) {
searchTerms.forEach(term => {
const regex = new RegExp(escapeHtml(term), "gi");
content.innerHTML = content.innerHTML.replace(regex, match => `<mark>${match}</mark>`);
});
}
// Utility: Clear previous highlights
function clearHighlights(content) {
content.innerHTML = displayHtml(content.innerHTML).replaceAll("<mark>", "").replaceAll("</mark>", "");
}
// Function to get active protocols
function getActiveProtocols() {
return protocols.filter(protocol => protocol.checked).map(protocol => protocol.value);
}
// Function to toggle "all" checkbox logic
function toggleAllCheckbox(activeProtocols) {
if (activeProtocols.length === protocols.length - 1 && !activeProtocols.includes("all")) {
allCheckbox.checked = true;
activeProtocols.push("all");
} else if (activeProtocols.includes("all") && activeProtocols.length > 1) {
allCheckbox.checked = false;
activeProtocols.splice(activeProtocols.indexOf("all"), 1);
}
}
// Function to apply filtering on cards based on active protocols and search query
function applyFilters() {
const searchQuery = escapeHtml(searchBox.value.trim().toLowerCase());
const searchTerms = searchQuery ? searchQuery.split(/\s+/) : [];
const activeProtocols = getActiveProtocols();
toggleAllCheckbox(activeProtocols);
cards.forEach(card => {
const searchableContent = Array.from(card.getElementsByClassName("searchable-port"));
const cardProtocol = protocols.find(protocol => card.classList.contains(`protocol-${protocol.value}`));
// Show/Hide card based on protocol filters
if (!activeProtocols.includes(cardProtocol.value) && !activeProtocols.includes("all")) {
card.classList.add("is-hidden");
return;
}
// Clear previous highlights
searchableContent.forEach(content => clearHighlights(content));
// Apply search term filters
if (searchTerms.length) {
const matches = searchTerms.every(term =>
searchableContent.some(content => content.innerHTML.toLowerCase().includes(term))
);
if (matches) {
card.classList.remove("is-hidden");
searchableContent.forEach(content => markSearchTerm(content, searchTerms));
} else {
card.classList.add("is-hidden");
}
} else {
card.classList.remove("is-hidden");
}
});
}
// Event listener for search box (debounced)
searchBox.addEventListener("input", debounce(applyFilters, 300));
// Event listeners for protocol checkboxes
protocols.forEach(protocol => {
protocol.addEventListener("change", applyFilters);
});
// Initialize filter logic on page load
applyFilters();
-
\$\begingroup\$ NOTE: I posted this after @JustinChang and I want to say, I did not benchmark anything. I'm leaving this answer though as debounce is an improvement you probably want to implement. \$\endgroup\$Wimanicesir– Wimanicesir2024年09月18日 14:36:22 +00:00Commented Sep 18, 2024 at 14:36
-
\$\begingroup\$ I think it's a helpful answer. Debounce and saving the number of active checkboxes are ideas worth exploring. A complementary idea I considered was a global variable for the number of active checkboxes per card, simplifying the determination of whether a card should be displayed based on the checkboxes. \$\endgroup\$Justin Chang– Justin Chang2024年09月18日 17:55:55 +00:00Commented Sep 18, 2024 at 17:55
-
\$\begingroup\$ Also another helpful/insightful response. I'm digging into this in a detailed manner today/tomorrow to come up with a an optimized revision. I will certainly be taking your advice into consideration as well. I'm used to only using camelCase for PHP OOP and snake_case for procedural/everything else. A habit I will work on breaking. Thank you for your time and detailed reply! \$\endgroup\$codejp3– codejp32024年09月19日 13:49:15 +00:00Commented Sep 19, 2024 at 13:49
Explore related questions
See similar questions with these tags.