I made a string class to be used as a replacement for std::string
. It has all the find functions (find
, rfind
, find_first_of
, ...), basic iterators (just begin
and end
, no reverse iterators), almost all the operators (+
, +=
, ==
, []
, <<
), an insert, erase and substring function, c_str()
, and it is dynamic.
It is mostly faster than std::string
. It allocates the characters on the heap (malloc
, realloc
, free
), and it shouldn't have any undefined behaviour.
I have to know if the memory management is optimal or if some undefined behaviour is present.
String.h
#pragma once
#include <cstdlib>
#include <cstring>
#include <string>
class String
{
private:
char* str;
size_t length;
size_t capacity;
public:
typedef char* iterator;
const short int npos = -1;
String(int size = 0)
{
length = 0;
capacity = size;
str = (char*)malloc((capacity + 1) * sizeof(char));
};
String(char* string)
{
length = strlen(string);
capacity = length;
str = (char*)malloc((length + 1) * sizeof(char));
memcpy(str, string, (length + 1) * sizeof(char));
}
String(const String &string)
{
str = (char*)malloc((string.length + 1) * sizeof(char));
length = string.length;
capacity = string.capacity;
memcpy(str, string.str, length * sizeof(char));
}
~String()
{
free(str);
}
const char* begin()
{
return str;
}
const char* end()
{
return str+length;
}
char front()
{
return *str;
}
char back()
{
return str[length];
}
char& operator[](int n)
{
return str[n];
}
friend String& operator+(String lhs, String &rhs)
{
lhs += rhs;
return lhs;
}
String& operator+=(String &right)
{
*this += right.str;
return *this;
}
String& operator+=(char* right)
{
size_t toAdd = strlen(right);
if (length + toAdd > capacity)
{
while (length + toAdd > capacity)
{
if (capacity == 0)
capacity = 1;
capacity *= 2;
}
str = (char*)realloc(str, (capacity + 1) * sizeof(char));
}
memcpy(str + length, right, toAdd * sizeof(char));
length += toAdd;
str[length] = 0;
return *this;
}
String& operator+=(char right)
{
if (length + 1 > capacity)
{
while (length + 1 > capacity)
{
if (capacity == 0)
capacity = 1;
capacity *= 2;
}
str = (char*)realloc(str, (capacity + 1) * sizeof(char));
}
str[length++] = right;
str[length] = 0;
return *this;
}
String& operator=(char* right)
{
length = strlen(right);
size_t prevCap = capacity;
while (length + 1 > capacity)
{
if (capacity == 0)
capacity = 1;
capacity *= 2;
}
if (capacity != prevCap)
str = (char*)realloc(str, (capacity + 1) * sizeof(char));
memcpy(str, right, (length + 1) * sizeof(char));
return *this;
}
String& operator=(String &right)
{
*this = right.str;
return *this;
}
bool operator==(String &right)
{
return strcmp(str, right.str) == 0;
}
int find(char* string)
{
size_t wordLength = strlen(string);
if (wordLength > length)
return npos;
bool match;
for (int i = 0; i < length-wordLength; i++)
{
match = true;
for (int j = 0; j < wordLength; j++)
{
if (str[i + j] != string[j])
{
match = false;
break;
}
}
if (match)
return i;
}
return npos;
}
int rfind(char* string)
{
size_t wordLength = strlen(string);
if (wordLength > length)
return npos;
bool match;
for (int i = length-1; i >= wordLength; i--)
{
match = true;
for (int j = wordLength-1; j >= 0; j--)
{
if (str[i + j] != string[j])
{
match = false;
break;
}
}
if (match)
return i;
}
return npos;
}
int find_first_of(char* string)
{
size_t wordLength = strlen(string);
for (int i = 0; i < length; i++)
{
char c = str[i];
for (int j = 0; j < wordLength; j++)
{
if (c == string[j])
return i;
}
}
return npos;
}
int find_last_of(char* string)
{
size_t wordLength = strlen(string);
for (int i = length-1; i >= 0; i--)
{
char c = str[i];
for (int j = 0; j < wordLength; j++)
{
if (c == string[j])
return i;
}
}
return npos;
}
int find_first_not_of(char* string)
{
size_t wordLength = strlen(string);
bool different;
for (int i = 0; i < length; i++)
{
char c = str[i];
different = true;
for (int j = 0; j < wordLength; j++)
{
if (c == string[j])
{
different = false;
break;
}
}
if (different)
return i;
}
return npos;
}
int find_last_not_of(char* string)
{
size_t wordLength = strlen(string);
bool different;
for (int i = length - 1; i >= 0; i--)
{
char c = str[i];
different = true;
for (int j = 0; j < wordLength; j++)
{
if (c == string[j])
{
different = false;
break;
}
}
if (different)
return i;
}
return npos;
}
String substr(int offset, size_t count = -1)
{
if (count == -1)
count = length - offset;
char* sub = (char*)_alloca(count * sizeof(char));
memcpy(sub, str + offset, count * sizeof(char));
sub[count] = 0;
return String(sub);
}
void assign(iterator begin, iterator end)
{
length = end - begin;
if (length > capacity)
{
while (length > capacity)
{
if (capacity == 0)
capacity = 1;
capacity *= 2;
}
str = (char*)realloc(str, (capacity + 1) * sizeof(char));
}
memcpy(str, begin, length * sizeof(char));
str[length] = 0;
}
void insert(int pos, char* toInsert)
{
int toAdd = strlen(toInsert);
if (length + toAdd > capacity)
{
while (length + toAdd > capacity)
{
if (capacity == 0)
capacity = 1;
capacity *= 2;
}
str = (char*)realloc(str, (capacity + 1) * sizeof(char));
}
memmove(str + pos + toAdd, str + pos, (length - pos) * sizeof(char));
memcpy(str + pos, toInsert, toAdd * sizeof(char));
length += toAdd;
str[length] = 0;
}
void erase(int pos, size_t count)
{
length -= count;
memmove(str + pos, str + pos + count, (length - pos) * sizeof(char));
str[length] = 0;
}
void pop_back()
{
str[--length] = 0;
}
const char* c_str()
{
return str;
}
friend std::ostream &operator<<(std::ostream &os, String &str)
{
os << str.str;
return os;
}
};
2 Answers 2
You claim that your String
is faster than std::string
I do not believe this claim. Please post your benchmark code.
std::string
often has Small String Optimisation to avoid dynamic memory allocation when used on small strings which is fairly common.
Your code is not const correct. For example:
const char* c_str()
should be:
const char* c_str() const
and:
bool operator==(String &right)
should be:
bool operator==(const String &right) const
This is not the correct signature for operator +
:
friend String& operator+(String lhs, String &rhs)
because it allows the following to compile:
String a,b;
a + b = "foo";
It should be:
friend String operator+(const String& lhs, const String& rhs)
Do not repeat yourself, your copy constructor and assignment operator share a lot of code. You can implement copy constructor like this:
String(const String& s)
: String() // Initialise to empty string
{
*this = s;
}
You should also implement move constructor and move assignment operator.
You should also used std::vector<char>
as the backing store, this way you don't need the size
or capacity
members. Your code will simplify a lot and you will not need to worry about manually keeping track of the memory. Using malloc/free
in a C++ program is a code smell.
But all in all, the above is pretty pointless because you really should use std::string
and save yourself the trouble of maintaining and coding your own string class.
Why do you use malloc
and free
in a C++ program? Plus you do not handle any bad allocation errors on the heap which may lead to a dangling pointer (comapre std::bad_alloc).
capacity = size;
// if the allocation fails str points to corrupted memory
str = (char*)malloc((capacity + 1) * sizeof(char));
And you adjust the capacity of the string even in case the allocation failed.
-
\$\begingroup\$ I use it because i need realloc too \$\endgroup\$Dan Dan– Dan Dan2016年12月24日 15:37:44 +00:00Commented Dec 24, 2016 at 15:37
-
4\$\begingroup\$ @DanDan Why do you need to use
realloc
? \$\endgroup\$Loki Astari– Loki Astari2016年12月25日 01:38:57 +00:00Commented Dec 25, 2016 at 1:38
Explore related questions
See similar questions with these tags.
std::string
if you actually plan on replacing it with this implementation. \$\endgroup\$std::basic_string
template? If they need, say, wide characters, UTF-16, or UTF-32, they have to stick with one of the standard strings anyways. And even if they just need ASCII or UTF-8,std::string
is so common that using a string type that isn't compatible with it all but guarantees that their code won't be easy to integrate with any existing code, unless the entire code base is designed aroundString
and doesn't depend on any external code that usesstd::string
. \$\endgroup\$npos
is ashort
instead of asize_t
, there's no member functionsdata()
,size()
,length()
,max_size()
,resize()
,capacity()
,reserve()
,clear()
,empty()
,shrink_to_fit()
,at()
,append()
,push_back()
,replace()
,swap()
,get_allocator()
,copy()
, orcompare()
. You're also missing some of the non-member functions, and all but one of the member typedefs. Any templated code that takes yourString
and expects the generic, standard interface is in for a treat. \$\endgroup\$alloca()
and its other variants are non-standard, and kinda platform-specific, so it's probably best to use some preprocessor#if
magic to make sure you get the right one (and provide an alternative for platforms that don't have a version of it). \$\endgroup\$std::string
. There are several obvious optimizations that are not done here. Also at least one place in this code where non optimal implementation is used. Please back up that claim with proof. \$\endgroup\$