Reducing Size of Overprovisioned Columns

A while ago, the company I worked for acquired another subsidiary. The database we inherited was full of overprovisioned columns. Today I wrote a script that would identify these columns. Additionally, I wanted to know the maximum data size (for said column) in any given row, and then recommend a new size limit the column could be adjusted for, by adding 20%. I also decided to round that size up to the nearest ten.

This is the script I came up with to handle the job.

/* THIS SCRIPT WILL EVALUATE and REPORT on table VARCHAR columns and give recommendations for REDUCTION or EXPANSION
It will create a sample ALTER TABLE statement for each item.
By David Speight
August 14, 2024
*/

DECLARE @multiplier float = 1.2;
DECLARE @curTsql nvarchar(2000);
DECLARE @curMaxLen int;
DECLARE @iLoop int = 0;

DECLARE @columnSpecs AS TABLE 
(	ID INT NOT NULL IDENTITY(1,1),
	DatabaseName varchar(250),
	SchemaName varchar(250),
	TableName varchar(250), 								
	ColumnName varchar(250), 
	DataType varchar(100),
	Size int, 
	MaxLen int,
	Recommend int,
	Increase bit,
	is_nullable bit,
	TsqlAlterTable varchar(2500));

INSERT INTO @columnSpecs (DatabaseName, SchemaName, TableName, ColumnName, Datatype, Size, is_nullable)
SELECT  DB_NAME(), S.name, T.name, C.name, P.name, C.max_length , C.is_nullable
FROM sys.objects AS T 
JOIN sys.columns AS C ON T.object_id=C.object_id
JOIN sys.types AS P ON C.system_type_id=P.system_type_id
JOIN sys.schemas S ON S.schema_id = T.schema_id    
WHERE T.type_desc='USER_TABLE' AND P.name <> 'sysname'
and P.name like '%varchar%'
AND (C.max_length > 100 or C.max_length = -1);

SELECT @iLoop = MAX(ID) FROM @columnSpecs;
WHILE @iLoop > 0
BEGIN
	-- Find the maximum character size of all rows for evaluated column.
	SELECT @curTsql = CONCAT('SELECT @x = MAX(LEN([',ColumnName,'])) FROM [',SchemaName,'].[',TableName,'] WITH(NOLOCK)')
	FROM @columnSpecs
	WHERE ID = @iLoop;	

	EXEC SP_EXECUTESQL @curTsql, N'@x INT OUT', @curMaxLen OUT;

	UPDATE @columnSpecs
	SET MaxLen = ISNULL(@curMaxLen,0)
	WHERE ID = @iLoop;

	SELECT	@iLoop		=	@iLoop-1,
			@curMaxLen	=	0,
			@curTsql	=	'';
END;

-- Handle recommendations
UPDATE @columnSpecs
SET Recommend = (CASE WHEN (MaxLen = 0) THEN 24							
	WHEN ((Size - MaxLen) > (MaxLen * @multiplier)) or (Size = -1) THEN (MaxLen * @multiplier)
	WHEN (Size - MaxLen) = 0 THEN (Size * @multiplier) 
	ELSE Size END),
Increase =  (CASE WHEN (Size - MaxLen) = 0 THEN 1 ELSE 0 END)
WHERE ID > 0;

-- Handle Varchar(max)
UPDATE @columnSpecs
SET Recommend = (CASE	WHEN (Size = -1) and (MaxLen < (400)) THEN Recommend
	WHEN (Size = -1) and (MaxLen < (800)) THEN 1000
	WHEN (Size = -1) and (MaxLen < (1600)) THEN 2000
	WHEN (Size = -1) and (MaxLen < (2400)) THEN 3000
	WHEN (Size = -1) and (DataType = 'varchar') and (MaxLen < (3200)) THEN 4000
	WHEN (Size = -1) and (DataType = 'varchar') and (MaxLen < (4200)) THEN 5000
	WHEN (Size = -1) and (DataType = 'varchar') and (MaxLen < (5200)) THEN 6000
	WHEN (Size = -1) and (DataType = 'varchar') and (MaxLen < (6200)) THEN 7000
	ELSE Recommend END)
WHERE Size = -1;

-- DELETE ROWS NOT NEEDING CHANGE
DELETE 
FROM @columnSpecs
WHERE ((Size > 0) and (Increase = 0 And Recommend >= Size))
	OR ((Size > 0) and (Increase = 1 And Recommend <= Size))
	OR ((Size = -1 and Recommend > 7000));

-- ROUND UP TO NEAREST 10
UPDATE @columnSpecs
SET Recommend =  floor((CONVERT(int,Recommend) + 9) / 10) * 10
WHERE isNumeric(Recommend) = 1;

-- USE 9999 to indicate MAX should be used for the data type
UPDATE @columnSpecs
SET Recommend = 9999
WHERE (DataType = 'nvarchar' and Recommend > 3500)
OR	(DataType = 'varchar' and Recommend > 7000);

-- CREATE Tsql statement to alter the table.
UPDATE @columnSpecs
SET TsqlAlterTable = CONCAT('ALTER TABLE [',SchemaName,'].[',TableName,'] ALTER COLUMN [',ColumnName,'] ',DataType,'(',
	(CASE WHEN Recommend = 9999 THEN 'MAX' ELSE Recommend END) ,') ',
	(CASE WHEN is_nullable = 1 THEN 'NULL' ELSE 'NOT NULL' END) ,';')
WHERE Recommend != 0;

SELECT * FROM @columnSpecs ORDER BY TableName, ColumnName;

SAMPLE OUTPUT

End.