SYSK 166: Stream Output Performance Comparison


If you need to write a large number of data items (e.g. records with many fields) to a stream, do you think it’s faster to use multiple stream.Write statements or “batch it” via string.Format and use one stream.Write statement per record?


 


My tests show that you’ll get almost twice the performance by using fewer stream.Write statements (see code below):


Many statements:  6875000 ticks


Batch:  3437500 ticks


 


Many statements:  6406250 ticks


Batch:  3281250 ticks


 


Many statements:  6093750 ticks


Batch:  3437500 ticks


 


Note:  I tested this using a MemoryStream and writing to a File – both resulted in roughly same ratio.


 


private void button1_Click(object sender, EventArgs e)


{


    int[] numbers = new int[10];


    for (int i = 0; i < numbers.Length; i++)


    {


        numbers[i] = i;


    }


 


   


    System.IO.MemoryStream ms = new System.IO.MemoryStream();


    System.IO.StreamWriter stream = new System.IO.StreamWriter(ms);


  


    long t1, t2;


 


    t1 = DateTime.Now.Ticks;


    for (int i = 0; i < 100000; i++)


    {


        stream.Write(“{0}”, numbers[0]);


        stream.Write(“{0}”, numbers[1]);


        stream.Write(“{0}”, numbers[2]);


        stream.Write(“{0}”, numbers[3]);


        stream.Write(“{0}”, numbers[4]);


        stream.Write(“{0}”, numbers[5]);


        stream.Write(“{0}”, numbers[6]);


        stream.Write(“{0}”, numbers[7]);


        stream.Write(“{0}”, numbers[8]);


        stream.Write(“{0}”, numbers[9]);


    }


    t2 = DateTime.Now.Ticks;


 


    stream.Close();


    ms.Close();


 


    System.Diagnostics.Debug.WriteLine(string.Format(“Many statements:  {0} ticks”, t2 – t1));


 


 


    ms = new System.IO.MemoryStream();


    stream = new System.IO.StreamWriter(ms);


 


    t1 = DateTime.Now.Ticks;


    for (int i = 0; i < 100000; i++)


    {


        string buffer = string.Format(“{0}{1}{2}{3}{4}{5}{6}{7}{8}{9}”,


            numbers[0], numbers[1], numbers[2], numbers[3], numbers[4], numbers[5],


            numbers[6], numbers[7], numbers[8], numbers[9]);


        stream.Write(buffer);


    }


    t2 = DateTime.Now.Ticks;


 


    stream.Close();


    ms.Close();


 


    System.Diagnostics.Debug.WriteLine(string.Format(“Batch:  {0} ticks”, t2 – t1));        


}


 

Comments (3)

  1. Peter Ritchie says:

    Not entirely unexpected, but half as slow seems huge to me.  The formatting overhead should roughly be the same; which leads me to wonder why batching is *that* much faster.

    …good to know though, thanks.

  2. hasanib says:

    I guess the lesson learned here is try to make as few calls to Stream.Write as possible

  3. Amid says:

    Your sample test is inadequate. The reason of such difference in performance figures is in multiple usage of the String.Format method.

    The following snippet shows results that differs from yours:

           private static void CallMe()

           {

               int[] numbers = new int[10];

               for(int i = 0; i < numbers.Length; i++)

               {

                   numbers[i] = i;

               }

               System.IO.MemoryStream ms = new System.IO.MemoryStream();

               System.IO.StreamWriter stream = new System.IO.StreamWriter(ms);

               long t1, t2;

               t1 = DateTime.Now.Ticks;

               for(int i = 0; i < 100000; i++)

               {

                   stream.Write(numbers[0].ToString());

                   stream.Write(numbers[1].ToString());

                   stream.Write(numbers[2].ToString());

                   stream.Write(numbers[3].ToString());

                   stream.Write(numbers[4].ToString());

                   stream.Write(numbers[5].ToString());

                   stream.Write(numbers[6].ToString());

                   stream.Write(numbers[7].ToString());

                   stream.Write(numbers[8].ToString());

                   stream.Write(numbers[9].ToString());

               }

               t2 = DateTime.Now.Ticks;

               stream.Close();

               ms.Close();

               

               Console.WriteLine(string.Format("Many statements:  {0} ticks", t2 – t1));

               ms = new System.IO.MemoryStream();

               stream = new System.IO.StreamWriter(ms);

               t1 = DateTime.Now.Ticks;

               for(int i = 0; i < 100000; i++)

               {

                   string buffer = string.Format("{0}{1}{2}{3}{4}{5}{6}{7}{8}{9}",

                       numbers[0], numbers[1], numbers[2], numbers[3], numbers[4], numbers[5],

                       numbers[6], numbers[7], numbers[8], numbers[9]);

                   stream.Write(buffer);

               }

               t2 = DateTime.Now.Ticks;

               stream.Close();

               ms.Close();

               Console.WriteLine(string.Format("Batch:  {0} ticks", t2 – t1));

           }

    The results are:

    Many statements:  3750336 ticks

    Batch:  5312976 ticks

    Many statements:  3906600 ticks

    Batch:  5312976 ticks

    Many statements:  3750336 ticks

    Batch:  5469240 ticks

    If we set capacity of the memory streams and get rid of String.Format at all, the performance figures are exactly the same.

    Sample code:

           private static void CallMe()

           {

               int[] numbers = new int[10];

               for(int i = 0; i < numbers.Length; i++)

               {

                   numbers[i] = i;

               }

               System.IO.MemoryStream ms = new System.IO.MemoryStream(10000000);

               System.IO.StreamWriter stream = new System.IO.StreamWriter(ms);

               long t1, t2;

               t1 = DateTime.Now.Ticks;

               for(int i = 0; i < 100000; i++)

               {

                   stream.Write(numbers[0].ToString());

                   stream.Write(numbers[1].ToString());

                   stream.Write(numbers[2].ToString());

                   stream.Write(numbers[3].ToString());

                   stream.Write(numbers[4].ToString());

                   stream.Write(numbers[5].ToString());

                   stream.Write(numbers[6].ToString());

                   stream.Write(numbers[7].ToString());

                   stream.Write(numbers[8].ToString());

                   stream.Write(numbers[9].ToString());

               }

               t2 = DateTime.Now.Ticks;

               stream.Close();

               ms.Close();

               

               Console.WriteLine(string.Format("Many statements:  {0} ticks", t2 – t1));

               ms = new System.IO.MemoryStream(10000000);

               stream = new System.IO.StreamWriter(ms);

               t1 = DateTime.Now.Ticks;

               for(int i = 0; i < 100000; i++)

               {

                   string buffer = string.Concat(

                       numbers[0].ToString(), numbers[1].ToString(), numbers[2].ToString(), numbers[3].ToString(), numbers[4].ToString(), numbers[5].ToString(),

                       numbers[6].ToString(), numbers[7].ToString(), numbers[8].ToString(), numbers[9].ToString());

                   stream.Write(buffer);

               }

               t2 = DateTime.Now.Ticks;

               stream.Close();

               ms.Close();

               Console.WriteLine(string.Format("Batch:  {0} ticks", t2 – t1));

           }

    The results are:

    Many statements:  3906600 ticks

    Batch:  3906600 ticks

    Many statements:  3906600 ticks

    Batch:  3906600 ticks

    Many statements:  3906600 ticks

    Batch:  3906600 ticks