Re-implementing Javascript's String.prototype.indexOf

Question 1

I had this idea of trying to re-implement the function String.prototype.indexOf in a way that could potentially be faster than the native function.

With some testing, I've doodled the following function in less than 1 hour:

function strpos(haystack, needle, offset)
{
 // Accessing <object>.length is slower than storing and
 // reading from a local variable
 var h_length = haystack.length;
 var n_length = needle.length;
 // Skips checking strings that have the same length (since they may be equal)
 // or strings where the needle is larger (a large needle won't fit in a small haystack)
 if(n_length >= h_length)
 {
 return needle === haystack ? 0 : -1;
 }
 // If the offset is too big, we will check outside the haystack
 // which means the string isn't there and we must quit
 if(offset >= h_length - n_length || offset < 0)
 {
 return -1;
 }
 // small performance boost
 var i_limit = h_length - n_length;
 next_char:
 for(var i = (offset || 0); i < i_limit; i++)
 {
 // Check if the first and last character from i are different
 // it still works for 1-character long needles
 if(needle[0] !== haystack[i] || needle[n_length - 1] !== haystack[i + n_length - 1])
 {
 continue;
 }
 // Check all characters inside needle, excluding the first and last
 for(var j = 1; j < n_length - 1; j++)
 {
 if(needle[j] !== haystack[i + j])
 {
 // The needle isn't in this position, so, check the next character
 continue next_char;
 }
 }
 // If you reached here, you found the needle
 return i;
 }
 // If you reached here, the haystack doesn't have the needle
 return -1;
}

The idea here was to skip checking as many characters as possible, and only checking the characters inside the needle if strictly necessary.

To test the speed of the function, I're ran the following code a few times:

console.time('indexOf');
'<haystack>'.indexOf('<needle>');
console.timeEnd('indexOf');
console.time('strpos');
strpos('<haystack>','<needle>');
console.timeEnd('strpos');

Based on what I've seen on Firefox 53.0.3(32-bit), on Windows 10, my implementation is around 5-20% faster most of the time.

You can try it bellow:

function strpos(haystack, needle, offset)
{
	// Accessing <object>.length is slower than storing and
	// reading from a local variable
	var h_length = haystack.length;
	var n_length = needle.length;
	
	// Skips checking strings that have the same length (since they may be equal)
	// or strings where the needle is larger (a large needle won't fit in a small haystack)
	if(n_length >= h_length)
	{
		return needle === haystack ? 0 : -1;
	}
	
	// If the offset is too big, we will check outside the haystack
	// which means the string isn't there and we must quit
	if(offset >= h_length - n_length || offset < 0)
	{
		return -1;
	}
	
	// small performance boost
	var i_limit = h_length - n_length;
	
	next_char:
	for(var i = (offset || 0); i < i_limit; i++)
	{
		// Check if the first and last character from i are different
		// it still works for 1-character long needles
		if(needle[0] !== haystack[i] || needle[n_length - 1] !== haystack[i + n_length - 1])
		{
			continue;
		}
		
		// Check all characters inside needle, excluding the first and last
		for(var j = 1; j < n_length - 1; j++)
		{
			if(needle[j] !== haystack[i + j])
			{
				// The needle isn't in this position, so, check the next character
				continue next_char;
			}
		}
		
		// If you reached here, you found the needle
		return i;
	}
	
	// If you reached here, the haystack doesn't have the needle
	return -1;
}
var haystack = prompt('Haystack:', '');
var needle = prompt('Needle:', '');
for(var i = 0; i < 10; i++)
{
	console.log('Iteration: ' + (i + 1));
	console.time('indexOf');
	var x = haystack.indexOf(needle);
	console.timeEnd('indexOf');
	console.log('position: ' + x);
	console.time('strpos');
	var y = strpos(haystack, needle);
	console.timeEnd('strpos');
	console.log('position: ' + y);
}

As far as I know, it is working as it should, returning -1 when expected.

Is there anything else I can improve on this code, regarding performance or readability? Or even a better way to do this, without that pesky continue next_char;?

Question 2

Your string matching runtime complexity seems to be in O(nm) with no preprocessing. Perhaps the built-in features an implementation with higher setup cost but lower runtime complexity for the search afterwards?

Question 3

@le_m I honestly don't know. But how did you got to that conclusion?

Question 4

[...] that could potentially be faster than the native function.

The native String.indexOf is fast, but comes with a certain constant setup cost that makes it slower than your implementation for very small strings. As soon as the input strings get longer however the built-in String.indexOf outperforms any other solution:

Small strings:

strpos 55,056,671
String.indexOf 23,497,940

Large strings:

String.indexOf 5,132,350
strpos 572,222

See https://jsperf.com/string-indexof-vs-strpos/1

The JavaScript specification doesn't mandate a specific implementation for String.indexOf, but I assume that the chosen string searching algorithm comes with higher setup cost but better runtime complexity or simply benefits from its native implementation speedup.

Question 5

I know it's a late answer, but I just stumbled over the question and I have a suggestion of how to get rid of the "pesky continue next_char;"

// Check all characters inside needle, excluding the first and last
for(var j = 1; j < n_length - 1; j++) {
 if(needle[j] !== haystack[i + j]) {
 // The needle isn't in this position, end the inner for loop
 break;
 }
}
if (j === n_length) {
// we looped through to the end of the needle, which means we found a match
 return i;
}

le_m le_m 1,94510 silver badges15 bronze badges · Accepted Answer · 2017-06-06 15:25:50Z

[...] that could potentially be faster than the native function.

The native String.indexOf is fast, but comes with a certain constant setup cost that makes it slower than your implementation for very small strings. As soon as the input strings get longer however the built-in String.indexOf outperforms any other solution:

Small strings:

strpos 55,056,671
String.indexOf 23,497,940

Large strings:

String.indexOf 5,132,350
strpos 572,222

See https://jsperf.com/string-indexof-vs-strpos/1

The JavaScript specification doesn't mandate a specific implementation for String.indexOf, but I assume that the chosen string searching algorithm comes with higher setup cost but better runtime complexity or simply benefits from its native implementation speedup.

Stack Exchange Network

Re-implementing Javascript's String.prototype.indexOf

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Re-implementing Javascript's String.prototype.indexOf

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions