General Guidelines of troubleshooting performance issues in Team Foundation Server

I think this is worth sharing for those of you who are handling performance issues. Some things to check for that I collated with SQL/ TFS Product team while troubleshooting performance issues.

 -  Where any updates installed on the machine (check the Control Panel -> Updates)

- Any changes in the storage where the SQL files are (DATA, LOG, TEMPDBs)?

-  How many tempdb files do customers have? Make sure they have exactly same number of TEMPDB data files and that the files are of the same size and there’s enough free space in the drive where the TEMPDB files are

Some data to collect:

(a)Long running SQL operations. On the TFS Config DB run

 

DECLARE @Events TABLE(

Session_Id SMALLINT,

EventType NVARCHAR(30) NULL,

Parameters INT NULL,

EventInfo NVARCHAR(255) NULL

)

 

DECLARE @Inputbuffer
TABLE (

EventType NVARCHAR(30) NULL,

Parameters INT
NULL,

EventInfo NVARCHAR(255) NULL

)

 

DECLARE ibcursor
CURSOR

   FOR

   select session_id from sys.dm_exec_requests

OPEN ibcursor;

DECLARE @sid
sysname;

FETCH NEXT
FROM ibcursor INTO @sid;

WHILE (@@FETCH_STATUS <>
-1)

BEGIN;

      
DELETE FROM @Inputbuffer

   INSERT INTO @Inputbuffer

   EXEC ('dbcc inputbuffer(' + @sid + ') WITH NO_INFOMSGS')

  

   INSERT INTO @Events

   SELECT @sid, * FROM @InputBuffer

 

   FETCH NEXT FROM ibcursor INTO @sid;

END;

CLOSE ibcursor;

DEALLOCATE ibcursor;

 

SELECT  e.EventInfo,

             
a.session_id,

        datediff(ss, a.Start_Time, getdate()) as Seconds,

       
a.total_elapsed_time /
1000.0 as ElapsedTime,

       
m.requested_memory_kb,

       
m.max_used_memory_kb,

       
m.dop,

       
a.command,

       
d.Name AS DBName,        

       
a.blocking_session_id,

        a.wait_type,

        a.wait_time,

       
a.last_wait_type,

       
a.wait_resource,

       
a.reads,

       
a.writes,

       
a.logical_reads,

       
a.cpu_time,

       
a.granted_query_memory

             
, convert(xml, c.query_plan)

FROM    sys.dm_exec_requests a with (nolock)

JOIN    @Events e

ON           
e.Session_Id = a.session_id

OUTER APPLY
sys.dm_exec_sql_text(a.sql_handle) b 

OUTER APPLY
sys.dm_exec_text_query_plan (a.plan_handle, a.statement_start_offset, a.statement_end_offset) c

LEFT JOIN
sys.dm_exec_query_memory_grants m (nolock)

ON      m.session_id = a.session_id

       
AND m.request_id = a.request_id

JOIN    sys.databases d

ON      d.database_id = a.database_id

WHERE   a.session_id > 50

       
and a.session_id <> @@spid

             
and e.EventType = 'RPC Event'

ORDER BY
a.Start_Time 

-         

 

(b)TFS command duration. Run this against the config DB and each of the collection DBs (replace the highlighted below with the DB names)

declare @startDate DATETIME = DATEADD(hour,-24,GETUTCDATE())

declare @endDate DATETIME = DATEADD(minute,-1,GETUTCDATE())

 

SELECT Command, Sum(ExecutionCount) as CommandCount,

sum(ExecutionTime)/1000 as TotalExecTimeInMS,

sum(ExecutionTime)/Sum(ExecutionCount)/1000 as ExecTimeInMSPerCmd

  FROM [<db>].[dbo].[tbl_Command] (nolock)    where starttime > @startDate and startTime <= @endDate

   GROUP BY Application, Command

   ORDER BY Command desc

 What TFS jobs are executing? (replace the highlighted with the name of one of the collection DBs)

             
SELECT T1.JobCount,

             
case T1.JobState

             
when -1 THEN 'Dormant'

             
when 0 THEN 'QueuedScheduled'

             
when 1 THEN 'Running'

             
when 2 THEN 'Paused'

             
when 3 THEN 'Pausing'

             
when 4 THEN 'Resuming'

             
when 5 THEN 'Stopping'

             
end as JobState,          

             
T2.JobName, T2.JobId, T1.Priority

             
from

      
(SELECT COUNT(JQ.JobId) as JobCount, JQ.JobId, JQ.JobState, JQ.Priority

  FROM Tfs_Configuration.[dbo].[tbl_JobQueue] JQ (nolock)

  group by JQ.JobId, JQ.JobState,JQ.Priority) as T1

   join (select JD.JobId, JD.JobName from Tfs_Configuration.[dbo].[tbl_JobDefinition] JD (nolock) group by JD.JobId, JD.JobName) as T2   on T1.JobId = T2.JobId

 

             
SELECT T1.JobCount,

             
case T1.JobState

             
when -1 THEN 'Dormant'

             
when 0 THEN 'QueuedScheduled'

             
when 1 THEN 'Running'

             
when 2 THEN 'Paused'

             
when 3 THEN 'Pausing'

             
when 4 THEN 'Resuming'

             
when 5 THEN 'Stopping'

             
end as JobState,          

             
T2.JobName, T2.JobId, T1.Priority  from (SELECT COUNT(JQ.JobId) as JobCount, JQ.JobId, JQ.JobState, JQ.Priority

  FROM Tfs_Configuration.[dbo].[tbl_JobQueue] JQ (nolock)   group by JQ.JobId, JQ.JobState,JQ.Priority) as T1 left join (select JD.JobId, JD.JobName from <collectionDB>.[dbo].[tbl_JobDefinition] JD (nolock) group by JD.JobId, JD.JobName) as T2

  on T1.JobId = T2.JobId   order by JobState DESC

 -         
Mitigation

Disable reporting ->  blogs.msdn.com/b/tfssetup/archive/2013/08/06/how-to-disable-a-tfs-warehouse-or-analysis-job.aspx

Note that it might be necessary to rebuild the warehouse and cube so do not do this if you are not ok with rebuilding

 Written by:Nitish Nagpal, Support Escalation Engineer