Tuesday, June 7, 2011

C# capitalize each word in string

One day I stumbled on this problem in my work so I have done some thinking and searched for simple solution. I have collected all implementation I could find created benchmark and measured performance.

Let take look at source code
using System;
using System.Diagnostics;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;

namespace ToTitleCase
{
 class MainClass
 {
  const string test = "How to capitalize the first character of each word? Other characters should be lower case.";
  const int N = 100000;
  
  public static string BadToTitleCase (string str)
  {
   StringBuilder result = new StringBuilder ();
   str = str.ToLower ();
   string [] names = str.Split (' ');

   for (int name_index = 0; name_index < names.Length; name_index++) {
    int i = 0;
    while (char.IsWhiteSpace(names[name_index][i]) && i < names[name_index].Length) {
     i++;
    }
    StringBuilder sb = new StringBuilder (names [name_index]);
    sb [i] = names [name_index].ToUpper () [i];
    result.Append (sb.ToString ());
    if (name_index < names.Length - 1)
     result.Append (" ");
   }
   return result.ToString ();
  }
  
  public static String BadToTitleCase2 (string str)
  {
   String[] split;

   split = str.Split (' ');
   str = String.Empty;
   foreach (String part in split) {
    Char[] chars;

    chars = part.ToCharArray ();
    if (chars.Length > 0) {
     chars [0] = ((new String (chars [0], 1)).ToUpper ().ToCharArray ()) [0];
    }
    str += new string (chars) + ' ';
   }
   str = str.Substring (0, str.Length - 1);
   return (str);
  }

  public static string SimpleToTitleCase (string str)
  {
   string result = null;
         
   if (!string.IsNullOrEmpty (str)) {
    string[] words = str.Split (' ');
    for (int i = 0; i < words.Length; i++) {
     var s = words [i];
     if (s.Length > 0) {
      words [i] = s [0].ToString ().ToUpper () + s.Substring (1);
     }     
    }
    result = string.Join (" ", words);
   }
   
   return result;
  }

  public static string NetToTitleCase (string str)
  {
   return CultureInfo.CurrentCulture.TextInfo.ToTitleCase (str.ToLower ());
  }
  
  public static string BufferToTitleCase (string str)
  {
   char[] array = str.ToCharArray ();

   int count = array.Length;
   if (count > 0)
    array [0] = char.ToUpper (array [0]);

   for (int i = 1; i < count; i++) {
    if (char.IsWhiteSpace (array [i - 1]))
     array [i] = char.ToUpper (array [i]);
    else
     array [i] = char.ToLower (array [i]);
   }

   return new string (array);
  }
  
  public static string StringBuilderToTitleCase (string str)
  {
   if (str == null)
    throw new ArgumentNullException ("value");   
   
   if (str.Length == 0)
    return str;
   
   StringBuilder sb = new StringBuilder (str.Length);
   // Upper the first char.
   sb.Append (char.ToUpper (str [0]));
   for (int i = 1; i < str.Length; i++) {
    // Get the current char.
    char c = str [i];
    
    // Upper if after a space.
    if (char.IsWhiteSpace (str [i - 1]))
     c = char.ToUpper (c);
    else
     c = char.ToLower (c);
    
    sb.Append (c);
   }
   
   return sb.ToString ();
  }
  
  public static string StringBuilderToTitleCase2 (string str)
  {
   if (str == null)
    throw new ArgumentNullException ("value");
  
   if (str.Length == 0)
    return str;
  
   StringBuilder result = new StringBuilder (str);
   result [0] = char.ToUpper (result [0]);
   for (int i = 1; i < result.Length; ++i) {
    if (char.IsWhiteSpace (result [i - 1]))
     result [i] = char.ToUpper (result [i]);
    else
     result [i] = char.ToLower (result [i]);
   }
   return result.ToString ();
  }
  
  static string CapitalizeString (Match str)
  {
   string strTemp = str.ToString ();
   strTemp = char.ToUpper (strTemp [0]) + strTemp.Substring (1, strTemp.Length - 1).ToLower ();
   return strTemp;
  }
  
  public static string RegexToTitleCase (string str)
  {
   return Regex.Replace (str, @"\w+", new MatchEvaluator (CapitalizeString));
  }
  
  public static void Main (string[] args)
  {
   Stopwatch watch = new Stopwatch ();
   watch.Start ();
   
   long tPrev = watch.ElapsedMilliseconds;
   for (int i = 0; i < N; i++) {
    string last = BufferToTitleCase (test);
   }
   Console.WriteLine ("BufferToTitleCase time: " + (watch.ElapsedMilliseconds - tPrev));
   
   tPrev = watch.ElapsedMilliseconds;
   for (int i = 0; i < N; i++) {
    string last = BadToTitleCase (test);
   }
   Console.WriteLine ("BadToTitleCase Time: " + (watch.ElapsedMilliseconds - tPrev));
   
   tPrev = watch.ElapsedMilliseconds;
   for (int i = 0; i < N; i++) {
    string last = BadToTitleCase2 (test);
   }
   Console.WriteLine ("BadToTitleCase2 Time: " + (watch.ElapsedMilliseconds - tPrev));
   
   tPrev = watch.ElapsedMilliseconds;
   for (int i = 0; i < N; i++) {
    string last = NetToTitleCase (test);
   }
   Console.WriteLine ("NetToTitleCase Time: " + (watch.ElapsedMilliseconds - tPrev));
   
   tPrev = watch.ElapsedMilliseconds;
   for (int i = 0; i < N; i++) {
    string last = StringBuilderToTitleCase (test);
   }
   Console.WriteLine ("StringBuilderToTitleCase Time: " + (watch.ElapsedMilliseconds - tPrev));

   tPrev = watch.ElapsedMilliseconds;
   for (int i = 0; i < N; i++) {
    string last = StringBuilderToTitleCase2 (test);
   }
   Console.WriteLine ("StringBuilderToTitleCase2 Time: " + (watch.ElapsedMilliseconds - tPrev));
   
   tPrev = watch.ElapsedMilliseconds;
   for (int i = 0; i < N; i++) {
    string last = SimpleToTitleCase (test);
   }
   Console.WriteLine ("SimpleToTitleCase Time: " + (watch.ElapsedMilliseconds - tPrev));
   
   tPrev = watch.ElapsedMilliseconds;
   for (int i = 0; i < N; i++) {
    string last = RegexToTitleCase (test);
   }
   Console.WriteLine ("RegexToTitleCase Time: " + (watch.ElapsedMilliseconds - tPrev));
   
   watch.Stop ();
   watch = null;
   
   Console.WriteLine ("Done. Press any key...");
   Console.ReadLine ();
  }
 }
}
Benchmark results
My Conclusions
  • RegexToTitleCase: for such simple text processing task regular expressions is overkill also it is clear that calling delegate function negatively effects performance;
  • BadToTitleCase and BadToTitleCase2: those implementation allocates new temporary objects for string processing and the source code is not elegant;
  • SimpleToTitleCase: wisely used string.Join no need to allocate new temporary objects for string processing this improves performance;
  • NetToTitleCase: uses CultureInfo.CurrentCulture.TextInfo.ToTitleCase which is native implementation so no comments here;
  • BufferToTitleCase: is my favorite because of simplicity of code and performance (avoids allocation of new temporary objects for string processing by using string.ToCharArray method).


P.S. If you have some better or worse implementations, please contact me, I would be very interesting to include those in my benchmark.