Using GZipStream for Compression in .NET [Brian Grunkemeyer]

One of the cool new features in Whidbey is a GZipStream class, which supports GNU's zip utility (gzip).  For anyone with a Unix background, you've had to come across .tar.gz or .tgz files as you deal with the tedium that accompany downloading new source code & compiling new binaries for your Linux box, etc.  Well, we now support that file format with the .NET Framework. 

I didn't work on this class directly, and when I first heard this, I was a little skeptical of whether our GZipStream was perfectly compatible with gzip.  So I wrote a managed gzip front end and tested it out.  Sure enough, it works!  We have a slightly less than perfect compression algorithm, but the point is we have compatibilty.  From a few files I played around with, with different levels of compression (gzip's -0 and -9 options), we can uncompress all of them.  And what we compress, gunzip can uncompress.  So we have compatibility, even if our compression is not quite optimal at this point.

There was one interesting little trick with gzip.  GNU doesn't distribute a gunzip binary.  Instead, if you copy gzip.exe to gunzip.exe, it starts uncompressing files!  For those with experience in Unix sysadmin work (or if you've ever written C applications that parse their command line args), you may be used to the idea of a binary that changes behavior based on the name of the binary.  This is called writing a program that is argv[0]-sensitive.  (Argv is the name of the argument to main in C programs that contains all of your command line parameters).  This is marginally harder in C#, because C# strips off argv[0] when passing parameters to main.  (I honestly don't quite know how they do this - maybe it's a relatively obscure CLR feature that I haven't seen yet, based on some metadata setting.)  However, we can easily get the program name using Environment.GetCommandLineArgs(), which returns a String[] that is equivalent to C's argv.  You can check this out below - look for mgunzip.

Sorry, we don't have tar support yet.

using System;

using System.IO;

using System.IO.Compression;

using System.Collections.Generic;

 

// To decompress, either use mgzip -d or "copy mgzip.exe mgunzip.exe".

// Like many Unix utilities, this one is argv[0] sensitive.

 

public static class MGZip

{

    private static void Usage()

    {

        Console.WriteLine("Managed GZip clone, using GZipStream");

 

        /*

          // Here's the real GZip's help

gzip 1.2.4 Win32 (02 Dec 97)

usage: gzip [-acdfhlLnNrtvV19] [-S suffix] [file ...]

 -a --ascii ascii text; convert end-of-lines using local conventions

 -c --stdout write on standard output, keep original files unchanged

 -d --decompress decompress

 -f --force force overwrite of output file and compress links

 -h --help give this help

 -l --list list compressed file contents

 -L --license display software license

 -n --no-name do not save or restore the original name and time stamp

 -N --name save or restore the original name and time stamp

 -q --quiet suppress all warnings

 -r --recursive operate recursively on directories

 -S .suf --suffix .suf use suffix .suf on compressed files

 -t --test test compressed file integrity

 -v --verbose verbose mode

 -V --version display version number

 -1 --fast compress faster

 -9 --best compress better

 file... files to (de)compress. If none given, use standard input.

        */

 

        Console.WriteLine("mgzip [-cdhL] [file ...]");

       Console.WriteLine(" -c --stdout write on standard output, keep original files unchanged");

        Console.WriteLine(" -d --decompress decompress");

        Console.WriteLine(" -h --help give this help");

        Console.WriteLine(" -L --license display software license");

        Console.WriteLine(" file... files to (de)compress. If none given, use standard input.");

    }

 

    private static void Compress(Stream source, Stream destination)

    {

        // We must explicitly close the output stream, or GZipStream will not

        // write the compression's footer to the file. So we'll get a file, but

        // we won't be able to decompress it. We'll get back 0 bytes.

        using(GZipStream output = new GZipStream(destination, CompressionMode.Compress)) {

            Pump(source, output);

        }

    }

 

    private static void Decompress(Stream source, Stream destination)

    {

        using(GZipStream input = new GZipStream(source, CompressionMode.Decompress)) {

            Pump(input, destination);

        }

    }

 

    private static void Pump(Stream input, Stream output)

    {

        byte[] bytes = new byte[4096];

        int n;

        while((n = input.Read(bytes, 0, bytes.Length)) != 0) {

            output.Write(bytes, 0, n);

        }

    }

 

    private static void Main(String[] args)

    {

        // This program is argv[0]-sensitive. If you name it mgunzip, it

        // will decompress by default.

        bool compress = true;

        // Unlike C, C# doesn't allow you to get the app's name. Fortunately,

        // our base class library allows you to do this.

        String[] argv = Environment.GetCommandLineArgs();

        String programName = Path.GetFileName(argv[0]);

        if (String.Equals("mgunzip", programName, StringComparison.OrdinalIgnoreCase) ||

            String.Equals("mgunzip.exe", programName, StringComparison.OrdinalIgnoreCase)) {

            compress = false;

  }

 

        bool useStdin = true;

        bool useStdout = false;

        List<String> inputFiles = new List<String>();

        if (args.Length < 1) {

            Usage();

            return;

        }

        for(int i=0; i<args.Length; i++) {

            String arg = args[i]; // Args are case sensitive.

            if (String.Equals(arg, "-d") || String.Equals(arg, "--decompress")) {

                compress = false;

            }

            else if (String.Equals(arg, "-h") || String.Equals(arg, "--help")) {

                Usage();

                return;

            }

            else if (String.Equals(arg, "-L") || String.Equals(arg, "--license")) {

                Console.WriteLine("Public domain - There is no license.");

        return;

            }

            if (String.Equals(arg, "-c") || String.Equals(arg, "--stdout")) {

                useStdout = true;

            }

            else if (arg[0] == '-') {

                Console.WriteLine("Unrecognized option \"{0}\". Try -h or --help for usage.", arg);

                return;

            }

            else {

                useStdin = false;

                inputFiles.Add(args[i]);

            }

        }

 

        Stream src = null;

        Stream dest = null;

        if (useStdin) {

            src = Console.OpenStandardInput();

            dest = Console.OpenStandardOutput();

 

            if (compress)

                Compress(src, dest);

            else

                Decompress(src, dest);

            return;

        }

        else {

            foreach(String fileName in inputFiles) {

                if (useStdout) {

                    dest = Console.OpenStandardOutput();

                }

                else {

                    String outputFileName;

                    if (compress)

                        outputFileName = fileName + ".gz";

                    else {

                        if (!fileName.EndsWith(".gz")) {

                            Console.Error.WriteLine("mgunzip: {0}: unknown suffix -- ignored.");

                            continue;

                        }

                        outputFileName = fileName.Substring(0, fileName.Length - 3);

                    }

                    dest = File.Create(outputFileName);

                }

 

                using(src = File.OpenRead(fileName)) {

                    using(dest) {

                        if (compress)

                            Compress(src, dest);

                        else

       Decompress(src, dest);

                    }

                }

                // GZip deletes the input file.

                if (!useStdout)

                    File.Delete(fileName);

            }

        }

    }

}