Not logged in - Login

Extract table data into a merge statement with unique column

The following extracts data from a SQL tables and creates a merge statement that can be used to populate a table with the same data (presumably, in another database.)

This script requires that the table has a single field that can uniquely identify a row, an identity column or a rowguidid column works well.

This script will work on SQL 2005, but the generated script requires SQL 2008 or higher.

Note: This extract script will not extract varbinary data.

/*

This script extracts the existing data in a table
and places it into a merge statement to allow the
table to be populated with the same data.
 
select * from dbo.MyTable

*/
 
-- specify the name of the table whose data is to be output
:setvar TableName dbo.MyTable
 
-- specify the column name that uniquely identifies the row (usually an identity column)
:setvar UniqueCol lRowID
 
-- Set this to 1 to have the merge statement compare the remainder of the fields before updating an existing row
:setvar CompareAllColumnsBeforeUpdate 0
 
-- declare and set a variable so we will know if the identity column data, if any, should be output
:setvar IncludeIdentity 1

-- make sure the temp table does not already exist
IF (OBJECT_ID('tempdb..#TempData') IS NOT NULL)
begin
    DROP TABLE #TempData
end
GO
 
-- create a temp table
CREATE TABLE #TempData
(
       lColOrder int not null primary key
       , sName varchar(255) not null
       , bIsIdentity bit not null default(0)
       , DataTypeId int not null
       , bUseTicks bit not null default(0)
       , ColData varchar(max) null       --not null default('null')
 
       , unique (sName, lColOrder)
)
GO
 
-- turn on nocount.
set nocount on;

declare @dtStartTime datetime
set @dtStartTime = getdate()

 
-- insert the table columns into our temp table.
INSERT into #TempData
(lColOrder, sName, bIsIdentity, DataTypeId )
select column_id, [name], a.is_identity, system_type_id
from sys.columns a
where a.[object_id] = object_id('$(TableName)')
       and a.is_computed = 0
       --and (a.is_identity = 0 or $(IncludeIdentity) = 1)
       --and a.system_type_id != 189       -- exclude rowversion columns.
       --
       --and a.column_id
-- get the smallest ColOrder value
declare @lFirstColOrder int
set @lFirstColOrder = (
       select top 1 lColOrder from #TempData order by lColOrder
)
 
-- update the 'Use Ticks' column for the data types that need to be enclosed in single quote characters.
UPDATE #TempData set
       bUseTicks = 1
where DataTypeId in (167, 35, 175, 40, 41, 42, 61, 36, 231)

--select * from #TempData

-- determine if the unique column requires tick marks around it.
DECLARE @bUseTicksAroundUniqueCol bit, @sTick varchar(1)
set @bUseTicksAroundUniqueCol = (
       select top 1 bUseTicks
       from #TempData
       where sName = '$(UniqueCol)'
)

--print 'Use ticks: ' + convert(varchar, @bUseTicksAroundUniqueCol)
if (@bUseTicksAroundUniqueCol = 1)
begin
       set @sTick = ''''
end else begin
       set @sTick = ''
end


-- declare a variable to use for our execute command statements.
declare @cmd varchar(max)
 
-- declare some variables we will use a little later.
declare @sCurrentCol varchar(255)
       , @lDataType int
       , @RowID int
       , @lSrcRowCount int
 
set @lSrcRowCount = 0 
 
-- update the [ColData] column to be null for all rows.
UPDATE #TempData set ColData=null
 
 
-- declare a variable to hold the output line.
declare @sOutputLine varchar(max)
declare @crlf varchar(2)
       , @tab varchar(1)
 
select @crlf =  char(13) + char(10)
       , @tab = char(9)
 
 
print '/*'
print ''
print '       delete from $(TableName)'
print ''
print '*/'
 
-- get the count of records we have in the source table
DECLARE @lSourceRowCount int;
SELECT @lSourceRowCount = count(*)
from $(TableName)
 
-- turn on the identity insert.
print 'set identity_insert $(TableName) on' + @crlf + @crlf
 
-- start the CTE portion of the output
set @sOutputLine = ';with cteSrc as (
       SELECT *
       from ( values
              -- Source DB: [' + @@servername + '].[' + DB_NAME() + '] at ' + convert(varchar(40), getdate(), 109) + @crlf
--print @sOutputLine
print ';with cteSrc as ('
print @tab + 'SELECT *'
print @tab + 'from ( values'
print @tab + @tab + '-- Source DB: [' + @@servername + '].[' + DB_NAME() + '] at ' + convert(varchar(40), getdate(), 109) 
print @tab + @tab + '-- Outputting ' + convert(varchar(10), @lSourceRowCount) + ' records.'
 
 
 
-- declare & open the columns cursor.
DECLARE curColumns cursor scroll read_only for
       select a.sName, a.DataTypeID
       from #TempData a
       where sName != '$(UniqueCol)'
       order by a.lColOrder
OPEN curColumns
 
-- declare & initialize our variable that holds the current source table row that we're processing.
DECLARE @lCurrentSourceRow int;
set @lCurrentSourceRow = 1;

DECLARE @sCurrentUniqueColData varchar(max)

-- keep going while we're still pointing to a valid row number
WHILE (@lCurrentSourceRow <= @lSourceRowCount)
begin
       -- get the 1st row from the columns cursor.
       FETCH first from curColumns into @sCurrentCol, @lDataType
 
       -- increment our row count
       set @lSrcRowCount = @lSrcRowCount + 1
 
       --set @sCurrentCol = '$(UniqueCol)'
       -- update the data for the UniqueCol row.
       set @cmd = 'update #TempData set ColData=(select [$(UniqueCol)] from (select [$(UniqueCol)], rownumber = row_number() over (order by $(UniqueCol)) from $(TableName)) a where a.rownumber = ' + convert(varchar, @lCurrentSourceRow) + ') where sName=''$(UniqueCol)'''
       --print @cmd
       exec (@cmd)
 
       --print 'Current column: ' + @sCurrentCol

       -- get the value of the unique column for the current row that we just stored into our temp table.
       set @sCurrentUniqueColData = (
              select ColData from #TempData where sName = '$(UniqueCol)'
       )

       -- keep going while the last fetch was good.
       WHILE (@@fetch_status = 0)
       begin
              -- build the command statement for the current column
              set @cmd =
                     case @lDataType
                            when 61 then       -- Handle the date/time column a bit differently.
                                   'update #TempData set ColData=(select convert(varchar(255), [' + @sCurrentCol + '], 126) from $(TableName) a where a.[$(UniqueCol)] = ' + @sTick + @sCurrentUniqueColData + @sTick + ') where sName=''' + @sCurrentCol + ''''

                            when 189 then       -- rowversion columns
                                   'update #TempData set ColData=(select convert(int, [' + @sCurrentCol + ']) from $(TableName) a where a.[$(UniqueCol)] = ' + @sTick + @sCurrentUniqueColData + @sTick + ') where sName=''' + @sCurrentCol + ''''

                            when 173 then              -- varbinary
                                   'update #TempData set ColData=''convert(binary, '''''''')'' where sName=''' + @sCurrentCol + ''''

                            else
                                   'update #TempData set ColData=(select [' + @sCurrentCol + '] from $(TableName) a where a.[$(UniqueCol)] = ' + @sTick + @sCurrentUniqueColData + @sTick + ') where sName=''' + @sCurrentCol + ''''
                     end
 
              --print @cmd
              exec (@cmd)
 
              -- get the next row from the cursor.
              FETCH next from curColumns into @sCurrentCol, @lDataType
       end
 
       -- set our null value text
       UPDATE #TempData set
              ColData = --'null'
                     case 
                            when ColData is null then 'null'
                            else '''' + replace(rtrim(ColData), '''', '''''') + ''''
                     end
       where ColData is null or bUseTicks=1
 
       -- initialize the output line, depending on if this is the 1st line or not.
       if (@lSrcRowCount = 1)
       begin
              set @sOutputLine = @tab + @tab + '(' 
       end else begin
              set @sOutputLine = @tab + @tab + ', ('
       end
       set @sOutputLine = @sOutputLine + + convert(varchar(10), @lCurrentSourceRow) + ', '


       -- build our output line of data
       SELECT @sOutputLine = @sOutputLine
              + case when a.rownumber > 1 then ', ' else '' end
              + a.ColData
       from (
              select *
                     , rownumber = row_number() over (order by lColOrder)
              from #TempData b       
              --where (b.bIsIdentity = 0 or $(IncludeIdentity) = 1)
       ) a
       order by a.lColOrder
 
       -- tack on a close parenthesis at the end.
       set @sOutputLIne = @sOutputLine + ')'
 
       -- output the line we created.
       print @sOutputLine
 
 
 
       -- increment our source row index
       set @lCurrentSourceRow = @lCurrentSourceRow + 1

       --set @lCurrentSourceRow = @lSourceRowCount + 100
end
 
 
-- close & deallocate the columns cursor
CLOSE curColumns
DEALLOCATE curColumns
 
set @sOutputLine = @tab + ') as src([RowNum], '
SELECT @sOutputLine = @sOutputLine
       + case when a.lColOrder > @lFirstColOrder then ', ' else '' end
       + '[' + a.sName + ']'
from #TempData a
order by a.lColOrder
 
-- close the CTE portion of the output
set @sOutputLine = @sOutputLine + ')' + @crlf + ')'
print @sOutputLine
print '-- select * from cteSrc'
 
-- build the merge statement
print 'merge into $(TableName) as dest -- target'
print 'using cteSrc as src on dest.$(UniqueCol) = src.$(UniqueCol) -- source'
print 'when matched '

if ($(CompareAllColumnsBeforeUpdate) = 1)
begin
       print @tab + 'and '
       print @tab + '('

       set @sOutputLine = ''
       SELECT @sOutputLine = @sOutputLine
                 + case when a.RowNumber = 1 then @tab + @tab else @crlf + @tab + @tab + 'or ' end
                 + 'dest.[' + a.sName + '] != src.[' + a.sName + ']'
                 from (
                              select *
                                           , RowNumber = row_number() over (order by lColOrder)
                              from #TempData b
                              where b.sName != '$(UniqueCol)'
                 ) a
       order by a.lColOrder
       print @sOutputLine

       print @tab + ')'
end
print 'then -- on match update'
print 'update set '
 
-- build the update part of the merge statement
set @sOutputLine = ''
SELECT @sOutputLine = @sOutputLine
       + case when a.RowNumber = 1 then @tab else @crlf + @tab + ', ' end
       + 'dest.[' + a.sName + '] = src.[' + a.sName + ']'
from (
       select *
              , RowNumber = row_number() over (order by lColOrder)
       from #TempData b
       where b.sName != '$(UniqueCol)'
) a
order by a.lColOrder
print @sOutputLine
 
-- build the insert part of the merge statement
print 'when not matched then -- insert missing'
 
-- create the comma separated list of columns.
set @sOutputLine = ''
SELECT @sOutputLine = @sOutputLine
       + case when a.RowNumber = 1 then '' else ', ' end
       + '[' + a.sName + ']'
       from (
       select *
              , RowNumber = row_number() over (order by lColOrder)
       from #TempData
       ) a
order by a.lColOrder
 
-- output the insert column & value list.
print @tab + 'insert (' + @sOutputLine + ')'
print @tab + 'values (' + @sOutputLine + ')'
 
print '--when not matched by source then delete       -- Uncomment this line to delete rows in the destination table that are missing from source'
print ';'


declare @dtEndTime datetime
set @dtEndTime = getdate()

print '-- Execution time to pull data: ' + convert(varchar(100), @dtEndTime - @dtStartTime, 114)
 
-- turn off the identity insert on the table.
print @crlf + 'set identity_insert $(TableName) off'
print 'DBCC checkident(''$(TableName)'', reseed)'
 
--turn off the nocount
set nocount off;
 
 
--select * from #TempData
 
---- make sure the temp table does not already exist
--IF (OBJECT_ID('tempdb..#TempData') IS NOT NULL)
--begin
-- DROP TABLE #TempData
--end