Using an OR/M I map a lot of rows from database to array of objects (~300k). In these objects some properties are marked with special attribute [Signed]
. Row-by-row I combine them in string, calculate salted MD5 of string and compare to hash I received from database.
Everything was working pretty okay for a long time, but now my website started to work quite slow and I run a profiler. It says the slowest method of whole website is ValueToString
. I optimized it as much as I can. Is it possible to optimize it more? I want to mention I do not need exact string representation so feel free to change formatters.
Another question if I will redesign everything from string
to byte[]
- will it speed up the process, what do you think?
static string ValueToString(object value)
{
if (value == null)
return "";
string value_string = value as string;
if (value_string != null)
return value_string;
int value_int;
if (value is Enum) {
value_int = (int)value;
if (value_int >= 0 && value_int <= 255)
return hexStringTable[((int)value)];
if (value_int == -1)
return "FFFFFFFF";
// Actually, we are never here.
return value_int.ToString("X");
}
decimal value_decimal;
if (TryCast(value, out value_decimal))
return value_decimal.ToString().TrimEnd('0');
if (TryCast(value, out value_int))
return value_int.ToString("X");
DateTime value_DateTime;
if (TryCast(value, out value_DateTime))
return value_DateTime.Ticks.ToString("X");
// We are never here for slow table.
return Convert.ToString(value,
CultureInfo.InvariantCulture);
}
static bool TryCast<T>(object o, out T r)
{
if (o is T) {
r = (T)o;
return true;
}
r = default(T);
return false;
}
static readonly string[] hexStringTable = new string[]
{
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "0A", "0B", "0C", "0D", "0E", "0F",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "1A", "1B", "1C", "1D", "1E", "1F",
"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "2A", "2B", "2C", "2D", "2E", "2F",
"30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "3A", "3B", "3C", "3D", "3E", "3F",
"40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "4A", "4B", "4C", "4D", "4E", "4F",
"50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "6A", "6B", "6C", "6D", "6E", "6F",
"70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "7A", "7B", "7C", "7D", "7E", "7F",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "8A", "8B", "8C", "8D", "8E", "8F",
"90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
"A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF",
"B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF",
"C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB", "CC", "CD", "CE", "CF",
"D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF",
"E0", "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF",
"F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF"
};
5 Answers 5
(I've made a few assumptions, so please correct me if I'm wrong.)
Let's suppose we have the following types:
public class Foo
{
public int Id { get; set; }
[Signed]
public DateTime Timestamp { get; set; }
[Signed]
public string Name { get; set; }
[Signed]
public decimal Price { get; set; }
[Signed]
public SomeEnum Thing { get; set; }
}
public enum SomeEnum
{
One = 1,
Two,
Three
}
public class SignedAttribute : Attribute
{
}
We want to hash the Signed
properties of an instance of Foo
(actually, many instances of Foo
).
I assume you're currently getting these properties via reflection. Something like this:
var properties = typeof(Foo)
.GetProperties()
.Where(p => Attribute.IsDefined(p, typeof(SignedAttribute)))
.OrderBy(p => p.Name)
.ToArray();
// ...
var sb = new StringBuilder();
foreach (var property in properties)
{
sb.Append(ValueToString(property.GetValue(foo)));
}
var bytes = Encoding.UTF8.GetBytes(sb.ToString());
md5.TransformBlock(bytes, 0, bytes.Length, bytes, 0);
Let's set up a test to see how long this takes
var foo = new Foo
{
Id = 1,
Name = "Thing",
Price = 123.45m,
Thing = SomeEnum.Two,
Timestamp = new DateTime(2015, 04, 07)
};
byte[] hash;
var sw = Stopwatch.StartNew();
using (var md5 = MD5.Create())
{
for (var i = 0; i < 300000; i++)
{
var sb = new StringBuilder();
foreach (var property in properties)
{
sb.Append(ValueToString(property.GetValue(foo)));
}
var bytes = Encoding.UTF8.GetBytes(sb.ToString());
md5.TransformBlock(bytes, 0, bytes.Length, bytes, 0);
}
md5.TransformFinalBlock(new byte[0], 0, 0);
hash = md5.Hash;
}
sw.Stop();
Here ValueToString
is the code as posted in your question. On my machine, I get a time of 0.8s.
How much better can we do? Suppose we could cheat and use the following method:
private static string FooToString(Foo foo)
{
var sb = new StringBuilder();
sb.Append(foo.Name);
sb.Append(foo.Price.ToString(
"0.############################",
CultureInfo.InvariantCulture));
sb.Append(((int) foo.Thing).ToString("X"));
sb.Append(foo.Timestamp.Ticks.ToString("X"));
return sb.ToString();
}
So now our loop looks like this:
for (var i = 0; i < 300000; i++)
{
var value = FooToString(foo);
var bytes = Encoding.UTF8.GetBytes(value);
md5.TransformBlock(bytes, 0, bytes.Length, bytes, 0);
}
I get a time of 0.36s, which I'll take as the baseline.
Great, but let's assume we can't write FooToString
. We can create an equivalent method dynamically.
We introduce some helper methods:
public static string ValueToString(string value)
{
return value ?? string.Empty;
}
public static string ValueToString(Enum value)
{
return Convert.ToInt32(value).ToString("X");
}
public static string ValueToString(decimal value)
{
return value.ToString("0.############################", CultureInfo.InvariantCulture);
}
public static string ValueToString(DateTime value)
{
return value.Ticks.ToString("X");
}
And then create the method dynamically, using reflection to find the properties of Foo
that have the Signed
attribute.
var properties = (typeof(Foo))
.GetProperties()
.Where(p => Attribute.IsDefined(p, typeof(SignedAttribute)))
.OrderBy(p => p.Name)
.ToArray();
var getSignatureMethod = new DynamicMethod(
"GetSignature",
typeof(string),
new[] { typeof(Foo) },
typeof(Foo).Module);
var generator = getSignatureMethod.GetILGenerator();
generator.DeclareLocal(typeof(StringBuilder));
generator.Emit(OpCodes.Newobj, typeof(StringBuilder).GetConstructor(Type.EmptyTypes));
generator.Emit(OpCodes.Stloc_0);
generator.Emit(OpCodes.Ldloc_0);
var append = typeof(StringBuilder).GetMethod("Append", new[] { typeof (string) });
foreach (var property in properties)
{
generator.Emit(OpCodes.Ldarg_0);
generator.Emit(OpCodes.Callvirt, property.GetGetMethod());
if (property.PropertyType.BaseType == typeof(Enum))
{
generator.Emit(OpCodes.Box, property.PropertyType);
}
generator.Emit(OpCodes.Call, typeof(Program).GetMethod("ValueToString", new[] { property.PropertyType }));
generator.Emit(OpCodes.Callvirt, append);
}
generator.Emit(OpCodes.Pop);
generator.Emit(OpCodes.Ldloc_0);
generator.Emit(OpCodes.Callvirt, typeof(object).GetMethod("ToString", new Type[] { }));
generator.Emit(OpCodes.Ret);
var getSignature = (Func<Foo, string>)
getSignatureMethod.CreateDelegate(typeof(Func<Foo, string>));
(I haven't worked with dynamically generated code before, there might be some things that could be improved.)
Now our loop looks like this:
for (var i = 0; i < 300000; i++)
{
var value = getSignature(foo);
var bytes = Encoding.UTF8.GetBytes(value);
md5.TransformBlock(bytes, 0, bytes.Length, bytes, 0);
}
And on my machine, it takes 0.39s, which is pretty close to the baseline.
I've put the code up on Github if you want to play around with it.
-
\$\begingroup\$ Wooow. This is the breakthrough! It works and works fast!! I adopted it to my situation, and like you said it really works twice as fast! Great help! \$\endgroup\$Denis– Denis2015年04月09日 10:52:04 +00:00Commented Apr 9, 2015 at 10:52
-
\$\begingroup\$ @Denis glad I could help :) \$\endgroup\$mjolka– mjolka2015年04月09日 10:54:33 +00:00Commented Apr 9, 2015 at 10:54
-
\$\begingroup\$ The only principal difference I made: removed OpCodes.Box and pass enums to ValueToInt(int). Looks like it is even faster this way. \$\endgroup\$Denis– Denis2015年04月09日 10:55:35 +00:00Commented Apr 9, 2015 at 10:55
-
\$\begingroup\$ @mjolka I'm pretty impressed by the effort you gave on this answer! You even got to the point of emitting il, that's just nuts. Good job. \$\endgroup\$Bruno Costa– Bruno Costa2015年04月09日 11:07:29 +00:00Commented Apr 9, 2015 at 11:07
-
\$\begingroup\$ I am impressed too. :) I wonder how much time you spent to prepare such the comprehensive answer for free. \$\endgroup\$Denis– Denis2015年04月09日 11:11:43 +00:00Commented Apr 9, 2015 at 11:11
There is already a try cast implemented in c#. You can use it by using as
keyword.
Eg:
string s = o as string;
if(s != null){
//...
}
I don't know if this will speed up your code but you can swap type validation for a dictionary of types. So instead of trying to cast the object to each type of object youy want to support the ValueToString
you will be using the GetType
method avaiable in object to index that dictionary.
public class Utils{
private static readonly Dictionary<Type, Func<object, string>> map;
static Utils(){
map = new Dictionary<Type, Func<object, string>>();
map.Add(typeof(Enum), v => ((int)v).ToString("X"));
map.Add(typeof(decimal), v => ((decimal)v).ToString("0.############################", CultureInfo.InvariantCulture));
map.Add(typeof(DateTime), v => ((DateTime)v).Ticks.ToString("X"));
}
static Func<object, string> GetEntry(Type type){
for(Type aux = type; aux != null; aux = aux.BaseType){
if(map.ContainsKey(aux)){
return map[aux];
}
}
return null;
}
public static string ValueToString(object value)
{
if (value == null)
return "";
var func = GetEntry(value.GetType());
if(func != null){
return func(value);
}
return Convert.ToString(value, CultureInfo.InvariantCulture);
}
}
As you might have seen this is a bit tricky, specially because of Enum
type. A Enum
type (Eg: BindingFlags
) is a sublclass of Enum
so I had to implement the GetEntry
to address this cases.
-
\$\begingroup\$ The best option though would be to retrieve the value as a string from the database already. You might want ot consider that. \$\endgroup\$Bruno Costa– Bruno Costa2015年04月06日 09:57:46 +00:00Commented Apr 6, 2015 at 9:57
-
\$\begingroup\$ I tried your version: it works a little slower (~580ms) than my (~490ms). \$\endgroup\$Denis– Denis2015年04月06日 10:20:19 +00:00Commented Apr 6, 2015 at 10:20
-
\$\begingroup\$ I impemented TryCast because it is impossible to do
decimal value_decimal = value as decimal
.as
requires reference-type. So I forced to writedecimal? value_decimal = value as decimal?
andvalue_decimal.Value
. As showed the profiler - millions of.Value
make my code much slower. \$\endgroup\$Denis– Denis2015年04月06日 10:22:37 +00:00Commented Apr 6, 2015 at 10:22 -
\$\begingroup\$ @Denis Oh I see your problem with the reference type now... If that's the case you may try one of two things: Get the value in string from the database or use AsParalell to use parallelism and see if that helps you. \$\endgroup\$Bruno Costa– Bruno Costa2015年04月06日 10:28:04 +00:00Commented Apr 6, 2015 at 10:28
This would perhaps have been better as a comment to the previous answer but I don't have enough rep yet.
The dictionary and lambdas may be adding overhead to the method so here is an alternative that might start you down the right track.
static string ValueToString(object value)
{
if (value == null)
return "";
Type valueType = value.GetType();
if (valueType == typeof(string))
{
return value as string;
}
if (valueType == typeof(decimal))
{
return ((decimal)value).ToString(
"0.############################",
CultureInfo.InvariantCulture);
}
if (valueType == typeof(DateTime))
{
return ((DateTime)value).Ticks.ToString("X");
}
if (typeof(Enum).IsAssignableFrom(valueType))
{
return ((int)value).ToString("X");
}
return Convert.ToString(value,
CultureInfo.InvariantCulture);
}
One thing that would be interesting to know is whether is the type testing that is slow or the conversion once you know what type you have. My approach has focussed on the testing rather than the conversion. If the latter turned out to be the slow part then you will not get much benefit from this code.
(削除) This answer might give you some insight into the issue with the enum conversion. It rightly points out that you can't convert a variable typed as Enum to int using a direct cast. (削除ここまで)
Edit: The above comment is a bit strong. The best way to convert an Enum is directly casting it to the underlying type (see this answer). I tested the conversion and Convert.ToInt32 is slower than the direct cast by a little bit.
Interestingly I tested just calling ToString on the enum directly and it appears to be an order of magnitude slower.
-
1\$\begingroup\$ Welcome to CodeReview, Rossco. Here's a +1 to help you on that rep dilemma. \$\endgroup\$Legato– Legato2015年04月06日 23:41:15 +00:00Commented Apr 6, 2015 at 23:41
-
\$\begingroup\$ Yay now I can comment! Thanks for the welcome (and rep ;) ) \$\endgroup\$Rossco– Rossco2015年04月06日 23:56:25 +00:00Commented Apr 6, 2015 at 23:56
-
\$\begingroup\$ I tested your variant. Unfortunately, it works as slow as Dictionary variant. Again, profiler says slowest places are
return ((int)value).ToString("X");
(few million calls, ~400ms) andreturn value_decimal.ToString("0.############################", CultureInfo.InvariantCulture);
(600k calls, ~100ms). \$\endgroup\$Denis– Denis2015年04月07日 00:36:27 +00:00Commented Apr 7, 2015 at 0:36 -
\$\begingroup\$ Cool, it seems its the conversion is the problem (sorry if you had tried to communicate that already). Have you tried swapping out
return ((int)value).ToString("X");
forreturn Convert.ToInt32(value).ToString("X");
? \$\endgroup\$Rossco– Rossco2015年04月07日 00:42:22 +00:00Commented Apr 7, 2015 at 0:42
In this code:
if (value is Enum)
return ((int)value).ToString("X");
which is slower-- the cast to int
, or the int-to-hex textualisation?
Also, your code seems to imply that this:
if (TryCast(value, out value_decimal))
return value_decimal.ToString(
"0.############################",
CultureInfo.InvariantCulture);
is only necessary because the default decimal.ToString()
leaves a lot of trailing zeroes. How about using a Regex
to snip them off?
-
\$\begingroup\$ Good idea. I replaced
"0.############################"
with.Trim('0')
. I also improved enum section by 20%, please look my initial post. \$\endgroup\$Denis– Denis2015年04月07日 23:03:34 +00:00Commented Apr 7, 2015 at 23:03
So to answer the second question you had about what effect converting to byte[] from strings. It would halve again the time taken. However it wouldn't return the exact same hash as would've been previously generated so there would be a major db update to deploy it.
building on @mjolka 's excellent answer (perf is from my machine)
original: 485ms
mjolka: 245ms
With Byte[]: 113ms
The generated code is using Sigil since i'm too lazy to write all that opcode by hand.
var emiter = Emit<Func<Foo, byte[]>>.NewDynamicMethod("GetSignature");
var valueToStringLookup = new Dictionary<Type, MethodInfo>()
{
{typeof(decimal), typeof(Program).GetMethod("ValueToString", new[] { typeof(decimal) })},
{typeof(string), typeof(Program).GetMethod("ValueToString", new[] { typeof(string) })},
{typeof(SomeEnum), typeof(Program).GetMethod("ValueToString", new[] { typeof(SomeEnum) })},
{typeof(DateTime), typeof(Program).GetMethod("ValueToString", new[] { typeof(DateTime) })}
};
var writeToStream = typeof(MemoryStream).GetMethod("Write", new[] { typeof(byte[]), typeof(int), typeof(int) });
using (var stream = emiter.DeclareLocal<MemoryStream>("stream"))
using (var interimArray = emiter.DeclareLocal<byte[]>("pa"))
{
emiter.NewObject<MemoryStream>();
emiter.StoreLocal(stream);
for (int index = 0; index < 3; index++)
{
var prop = properties[index];
emiter.LoadArgument(0);
emiter.CallVirtual(prop.GetGetMethod());
emiter.Call(valueToStringLookup[prop.PropertyType]);
emiter.StoreLocal(interimArray);
emiter.LoadLocal(stream);
emiter.LoadLocal(interimArray);
emiter.LoadConstant(0);
emiter.LoadLocal(interimArray);
emiter.LoadLength<byte>();
emiter.Call(writeToStream);
}
emiter.LoadLocal(stream);
emiter.Call(typeof(MemoryStream).GetMethod("ToArray", new Type[] { }));
emiter.Return();
}
var getSignature = emiter.CreateDelegate();
The array generation code is the cause of the difference since by getting the bytes of the actual values without doing the ToString("X")
you're going to be putting different values into the hash, maybe the perf is worth it:
public static byte[] ValueToString(string value)
{
return Encoding.UTF8.GetBytes(value ?? string.Empty);
}
public static byte[] ValueToString(SomeEnum value)
{
return BitConverter.GetBytes((int)value);
}
public static byte[] ValueToString(decimal value)
{
return BitConverter.GetBytes(decimal.ToDouble(value));
}
public static byte[] ValueToString(DateTime value)
{
return BitConverter.GetBytes(value.Ticks);
}
You can see a fork of mjolka's work with my changes at GitHub Repo
-
\$\begingroup\$ Thank you. I believe it works really fast, but I have already deployed @mjolka version. The only thing personally I do not like in your version is
decimal.ToDouble(value)
. I receive data from third-party, and sometimes 1.05 is 1.05, sometimes 1.05000000, I am not suredecimal.ToDouble(value)
will correctly handle this. I would change it to(value / 1.0000000000000000000000000000).GetBits() .... Buffer.BlockCopy(.....)
\$\endgroup\$Denis– Denis2015年04月11日 11:35:33 +00:00Commented Apr 11, 2015 at 11:35 -
\$\begingroup\$ Also, I believe it is possible to calculate final
byte[]
length in advance, allocate it, and then map everything on it usingBuffer.BlockCopy
. \$\endgroup\$Denis– Denis2015年04月11日 13:20:07 +00:00Commented Apr 11, 2015 at 13:20
TryCast<T>()
method. why would you do that for Decimal and a Long? I am pretty sure that both of those have their own method for that. \$\endgroup\$return ((int)value).ToString("X");
(before my optimisation it wasvalue_Enum.ToString("d")
-- worked 2 times slower, now is slow too) andreturn value_decimal.ToString("0.############################", CultureInfo.InvariantCulture);
. \$\endgroup\$