symbol filter redux

A while ago I provided this local symbol server proxy you could use to get just the symbols you want.  I was watching it work a couple of weeks ago and I noticed that most of the time what it ends up doing is proxying a 302 redirect.  Which was kind of cool because that meant that it didn't actually have to do much heavy lifting at all and the symbols were coming directly from the original source.  That's when it hit me that I had been doing it wrong the whole time.  So I deleted all the of the proxy code.  What it does now is that it always serves either a 404 if the request isn't on the white-list or it serves a 302 if it is on the white list.  It just redirects according to the path provided.

The original article is here

To use it, instead of doing this:

set _NT_SYMBOL_PATH=srv*https://yourserver/yourpath

Do this:

set _NT_SYMBOL_PATH=srv*https://localhost:8080/yourserver/yourpath

If the pattern matches a line in symfilter.txt it will serve a 302 redirect to https://yourserver/yourpath/[the usual pdb request path]

Note the syntax is slightly different than the original version.

Now that it's only serving redirects or failures it probably could use the http listener directly because the reasons for doing sockets myself are all gone.  But I didn't make that change.

The code is here:

 using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Net;
using System.Net.Sockets;

namespace SymbolFilter
{
    class FilterProxy
    {
        // we'll accept connections using this listener
        static TcpListener listener;

        // this holds the white list of DLLs we do not ignore
        static List dllFilterList = new List();

        // default port is 8080, config would be nice...
        const int port = 8080;

        static void Main(string[] args)
        {
            // load up the dlls
            InitializeDllFilters();

            // open the socket
            StartHttpListener();

            // all real work happens in the background, if you ever press enter we just exit
            Console.WriteLine("Listening on port {0}.  Press enter to exit", port);
            Console.ReadLine();
        }

        static void InitializeDllFilters()
        {
            try
            {
                // we're just going to throw if it fails...
                StreamReader sr = new StreamReader("symfilter.txt");

                // read lines from the file
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    dllFilterList.Add(line.Trim().ToLowerInvariant());
                }

                sr.Close();

            }
            catch (Exception e)
            {
                // anything goes wrong and we're done here, there's no recovery from this
                Console.WriteLine(e.Message);
                Environment.Exit(1);
            }
        }

        // Here we will just listen for connections on the loopback adapter, port 8080
        // this could really use some configuration options as well.  In fact running more than one of these
        // listening to different ports with different white lists would be very useful.
        public static void StartHttpListener()
        {
            try
            {
                listener = new TcpListener(IPAddress.Loopback, port);
                listener.Start();
                listener.BeginAcceptTcpClient(new AsyncCallback(DoAcceptTcpClientCallback), listener);
            }
            catch (Exception e)
            {
                // anything goes wrong and we're done here, there's no recovery from this
                Console.WriteLine(e.Message);
                Environment.Exit(1);
            }

        }

        // when we accept a new listener this callback will do the job
        public static void DoAcceptTcpClientCallback(IAsyncResult ar)
        {
            // Get the listener that handles the client request.
            TcpListener listener = (TcpListener)ar.AsyncState;

            // I should probably assert that the listener is the listener that I think it is here

            // End the operation and display the received data on  
            // the console.
            try
            {
                // end the async activity and open the client
                // I don't support keepalive so "using" is fine
                using (TcpClient client = listener.EndAcceptTcpClient(ar))
                {
                    // Process the http request
                    // Note that by doing it here we are effectively serializing the listens
                    // that seems to be ok because mostly we only redirect
                    ProcessHttpRequest(client);
                }
            }
            catch (Exception e)
            {
                // if anything goes wrong we'll just move on to the next connection
                Console.WriteLine(e.Message);
            }

            // queue up another listen
            listener.BeginAcceptTcpClient(new AsyncCallback(DoAcceptTcpClientCallback), listener);
        }


        // we have an incoming request, let's handle it
        static void ProcessHttpRequest(TcpClient client)
        {
            // we're going to process the request as text
            NetworkStream stream = client.GetStream();
            StreamReader sr = new StreamReader(stream);

            // read until the first blank line or the end, whichever comes first
            var lines = new List();
            for (;;)
            {
                var line = sr.ReadLine();

                if (line == null || line == "")
                    break;

                lines.Add(line);
            }

            // e.g. "GET /foo.pdb/DE1EBC3EE7E542EA96B066229D3A40081/foo.pdb HTTP/1.1"
            var req = lines[0];

            // avoid case sensitivity issues for matching the pattern
            var reqLower = Uri.UnescapeDataString(req).ToLowerInvariant();

            // loop over available patterns, if any matches early out
            int i;
            for (i = 0; i < dllFilterList.Count; i++)
            {
                if (reqLower.Contains(dllFilterList[i]))
                    break;
            }

            // if we didn't match, or it isn't a GET or it isn't HTTP/1.1 then serve up a 404
            if (i == dllFilterList.Count || !req.StartsWith("GET /") || !req.EndsWith(" HTTP/1.1"))
            {
                // you don't match, fast exit, this is basically the whole point of this thing
                Return404(client);
            }
            else
            {
                // this is the real work
                Console.WriteLine("Matched pattern: {0}", dllFilterList[i]);
                RedirectRequest(client, req);
            }
        }

        // cons up a minimal 404 error and return it
        static void Return404(TcpClient client)
        {
            // it doesn't get any simpler than this
            var sw = new StreamWriter(client.GetStream());
            sw.WriteLine("HTTP/1.1 404 Not Found");
            sw.WriteLine();
            sw.Flush();
        }

        // cons up a minimal 404 error and return it
        static void Return302(TcpClient client, string server, string url)
        {
            string line = String.Format("Location: https://{0}{1}", server, url);
            Console.WriteLine("302 Redirect {0}", line);

            // emit the redirect
            var sw = new StreamWriter(client.GetStream());
            sw.WriteLine("HTTP/1.1 302 Redirect");
            sw.WriteLine(line);
            sw.WriteLine();
            sw.Flush();
        }


        static void RedirectRequest(TcpClient client, string req)
        {
            // we know this is safe to do because we already verified that the request starts with "GET /"
            string request = req.Substring(5); // strip off the "GET /"
            request = request.Substring(0, request.Length - 9);  // strip off " HTTP/1.1"

            // we're looking for the server that we are supposed to use for this request in our path
            // the point of this is to make it so that you can set your symbol path to include
            // https://localhost:8080/your-sym-server/whatever
            // and your-sym-server will be used by the proxy
            int delim = request.IndexOf('/', 1);

            // if there is no server specified then we're done
            if (delim < 0)
            {
                // serve up an error
                Return404(client);
                return;
            }

            // the target server is everything up to the / but not including it
            string targetServer = request.Substring(0, delim);

            // the new request URL starts with the first character after the |
            request = request.Substring(delim + 1);

            // if there isn't already a leading slash, then add one
            if (!request.StartsWith("/"))
                request = "/" + request;

            Return302(client, targetServer, request);
        }
    }
}