Filestream and Full-Text – Full Solution for Document Indexing in SQL Server


Recently someone was asking if there is a simple SQL Server solution where you would generate text documents and simply pass them to SQL Server to catalog without writing an application to do this. Also, would like to be able to index and search the context of these documents.

The solution in SQL Server would involve Filestream and Full-Text technologies. There are two alternatives:

  1. Use FileTable
  2. Use a regular table

Below is a sample script of what each scenario would look like:

/******************************************

--Using FileTable

**************************************************/

use master

exec sp_configure 'filestream access level', 2

go

drop database FileTableDB_forTextDocs

go

CREATE DATABASE FileTableDB_forTextDocs

ON PRIMARY (

NAME = N'FileTableDB',

FILENAME = N'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\FileTableFileTableDB.mdf'

),

FILEGROUP FilestreamFG CONTAINS FILESTREAM (

NAME = FileStreamGroup1,

FILENAME= 'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\FileTableData' )

LOG ON (

NAME = N'FileTableDB_Log',

FILENAME = N'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\FileTableFileTableDB_log.ldf'

)

WITH FILESTREAM (

NON_TRANSACTED_ACCESS = FULL,

DIRECTORY_NAME = N'TextesFileTable' )

go

--validate the db with directory name

SELECT DB_NAME ( database_id ), directory_name, non_transacted_access, non_transacted_access_desc

FROM sys.database_filestream_options

where directory_name is not null

GO

use FileTableDB_forTextDocs

go

CREATE TABLE dbo.TextDocuments AS FileTable

WITH (

FileTable_Directory = 'TextesFileTable',

FileTable_Collate_Filename = database_default

);

GO

--validate table is there

SELECT * FROM sys.filetables;

GO

SELECT * FROM sys.tables WHERE is_filetable = 1;

GO

--check if there is something in there - NOTHING YET

select * from dbo.TextDocuments

--find the location of the filetable root path (DIRECTORY_NAME)

SELECT FileTableRootPath('dbo.TextDocuments');

--Copy/create 2-3 simple text files in that share found from above command

--MAKE SURE YOU CLOSE THE SHARE, OR HAVING IT OPEN IN WINDOWS EXPLORER CAN CAUSE BLOCKING

--check if there is something in there - 3 documents

select * from dbo.TextDocuments

--Now create FT Indexing on it

--catalog

CREATE FULLTEXT CATALOG [FTCat1] WITH ACCENT_SENSITIVITY = ON

--get the primary/unique key name

exec sp_help Textdocuments

--index

CREATE FULLTEXT INDEX ON [dbo].[TextDocuments] (file_stream TYPE COLUMN [name]) KEY INDEX [PK__TextDocum__5A5B77D58E492DCE] ON ([FTCat1]) WITH (CHANGE_TRACKING AUTO)

--search full text

select * from dbo.TextDocuments

where contains (file_stream, 'word_in_document')

 

2. Using a regular table

/******************************************

--Using Filestream and regular table

**************************************************/

drop table TextDocumentsRegular

go

create table TextDocumentsRegular(

Guid UNIQUEIDENTIFIER ROWGUIDCOL NOT NULL UNIQUE DEFAULT NEWSEQUENTIALID(),

documentID int not null ,

documentType varchar(10) not null,

TextContent varbinary(max) FILESTREAM not null ,

dateinserted datetime )

go

INSERT INTO TextDocumentsRegular (documentID, documentType, TextContent, dateinserted)

SELECT 101, '.txt', *, GETDATE() FROM OPENROWSET(BULK N'C:\temp\NewTextDocument.txt', SINGLE_BLOB) rs

go

select * from TextDocumentsRegular

--get the primary/unique key name

exec sp_help TextDocumentsRegular

--create full-text index

CREATE FULLTEXT INDEX ON [dbo].TextDocumentsRegular (Textcontent type column documentType) KEY INDEX UQ__TextDocum__A2B5777D37D0C9EB ON ([FTCat1]) WITH (CHANGE_TRACKING AUTO)

--search full text

select * from dbo.TextDocumentsRegular

where contains (TextContent, 'word_in_document')

 

Namaste,

Joseph

 


Comments (0)

Skip to main content