Get most duplicated strings from a heap dump using ClrMD
ClrMD is an awesome managed API to inspect managed processes and dump files. To use it, just add a NuGet reference to Microsoft.Diagnostics.Runtime. When loading a dump, be sure to have the mscordacwks.dll from the machine where the dump was taken. Also make sure that your program that uses ClrMD is the same platform (32/64-bit) as the process/dump that you’re inspecting.
Here’s a sample of getting most duplicated strings out of a dump, an indication that you might need a string cache somewhere. Remember that if creating a dump of a 32-bit process on a 64-bit OS you need to use the 32-bit Task Manager, otherwise the dump will be useless.
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.Diagnostics.Runtime;
namespace DumpTools
{
class DumpHeapStrings
{
static void Main(string[] args)
{
using (var dataTarget = DataTarget.LoadCrashDump(@"app.dmp"))
{
var dacLocation = dataTarget.ClrVersions[0].TryGetDacLocation();
var runtime = dataTarget.CreateRuntime(dacLocation);
var heap = runtime.GetHeap();
var objects = heap.EnumerateObjects();
var stringUsages = new Dictionary<string, long>();
foreach (var instance in objects.Take(1000000))
{
var type = heap.GetObjectType(instance);
if (type != null && type.IsString)
{
var size = type.GetSize(instance);
var value = (string)type.GetValue(instance);
long usages = 0L;
if (stringUsages.TryGetValue(value, out usages))
{
stringUsages[value] = usages + 1;
}
else
{
stringUsages[value] = 1;
}
}
}
var sorted = stringUsages.OrderByDescending(kvp => kvp.Value).Take(100);
foreach (var kvp in sorted)
{
Console.WriteLine(kvp.Value + "\t\t" + kvp.Key);
}
}
}
}
}