Load Text/ CSV files To table when Number of Column Can Change in Source File/s - SQL Server Integration Services(SSIS) Tutorial

Scenario:

I have been reading this question so many times on different forums, blog post etc. " How to load the data to SQL Server Table from File when Columns can change in the source file. Sometime we have less number of columns than Table columns or sometime same number of columns. 


Let's move to this scenario today.

Solution

Step 1:

To make the SSIS Package more dynamic, I am going to use three Parameters, you can use variables if you are still using old versions of SSIS such as 2005 or 2008/R2.

Fig 1: Create SSIS Package Parameters or Variable so you can change when you need

As you can see that we have Source Folder parameter, Table name, Delimiter. So you can use this package to load any files from Source Folder.You only have to provide the delimiter of file and table name in which you want to load the file/s.


Step 2:

Click in Connection Manager Pane and then Create ADO.NET Connection by providing Server Name and database Name. After creating I have renamed it to DBConn.

Fig 2: Create ADO.NET Connection so we can use in Script Task


Step 3: 

Bring the Script Task to Control Flow Pane and then open it. I have used C# for scripting. Map the SSIS Variables/Package Parameters as shown below.

Fig 3: Map the SSIS Variables/Package Parameters to Script task in SSIS Package

Step 4: 
Click on Edit Script Button and then add below script to Script Task.

Under namespaces add these two namespaces

using System.Data.SqlClient;
using System.IO;

Replace public void Main with below script. There is a lot more you can do. You can even check the column name exists before you insert and handle that exception. In this code, I am not checking if column name exists or not. So if somebody provide a file with column name which do not exits in table, this script is going to fail.

public void Main()
        {

            string delimiter = Dts.Variables["$Package::Delimiter"].Value.ToString();
            string TableName = Dts.Variables["$Package::TableName"].Value.ToString();
            SqlConnection myADONETConnection = new SqlConnection();
            myADONETConnection = (SqlConnection)
         (Dts.Connections["DBConn"].AcquireConnection(Dts.Transaction) as SqlConnection);
           
            
            //Reading file names one by one
            string SourceDirectory = Dts.Variables["$Package::SourceFolder"].Value.ToString();
            string[] fileEntries = Directory.GetFiles(SourceDirectory);
            foreach (string fileName in fileEntries)
            {
                
               // MessageBox.Show(fileName);
                string columname = "";
                
                //Reading first line of each file and assign to variable
                System.IO.StreamReader file2 = new System.IO.StreamReader(fileName);


                
                //Writing Data of File Into Table
                int counter = 0;
                string line;

                System.IO.StreamReader SourceFile =
                new System.IO.StreamReader(fileName);
                while ((line = SourceFile.ReadLine()) != null)
                {

                    if (counter == 0)
                    {
                        columname = line.ToString();
                        columname = "" + columname.Replace(delimiter, ",") ;
                        //MessageBox.Show(columname);
                      }

                    else
                    {
                       // MessageBox.Show("Inside ELSE");
                        string query = "Insert into " + TableName +
           "(" + columname + ") VALUES('" + line.Replace(delimiter, "','") + "')";
                        //MessageBox.Show(query.ToString());
                        SqlCommand myCommand1 = new SqlCommand(query, myADONETConnection);
                        myCommand1.ExecuteNonQuery();
                    }

                    counter++;

                }

                SourceFile.Close();
            }
            // TODO: Add your code here

            Dts.TaskResult = (int)ScriptResults.Success;
        }



Save the script and close the Script Task Editor. Now you can go ahead and place files in Source directory with different columns. Provide the Table name in which you want to load those files in SSIS Package. 

Once you will deploy this SSIS Package, you will be using Configuration to change the values for TableName,Delimiter and Source Folder.

4 comments:

  1. This comment has been removed by the author.

    ReplyDelete
  2. This comment has been removed by the author.

    ReplyDelete
  3. This is great but it does not look for commas inside a value or breaks
    Ex. "123,
    abc"

    Here are some improvements

    using System;
    using System.Data.SqlClient;
    using System.IO;
    using System.Text.RegularExpressions;
    using System.Windows.Forms;

    -------------------------------------------------

    public void Main()
    {
    string delimiter = Dts.Variables["User::delimiter"].Value.ToString();
    string TableName = Dts.Variables["User::tableName"].Value.ToString();
    string SourceDirectory = Dts.Variables["User::fileDir"].Value.ToString();

    string[] fileEntries = Directory.GetFiles(SourceDirectory);
    Regex CSVParser = new Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");

    SqlConnection myADONETConnection = new SqlConnection();
    myADONETConnection = (SqlConnection)

    (Dts.Connections["dbConnection"].AcquireConnection(Dts.Transaction) as SqlConnection);

    foreach (string fileName in fileEntries)
    {
    if (fileName.Contains(".csv") && fileName.Contains("RESM"))
    {

    //MessageBox.Show("fileName " + fileName);
    string columname = "";
    string newLine;
    string readLine;
    string appendLine = "";
    int headersCount = 0;
    int counter = 0;

    StreamReader SourceFile = new StreamReader(fileName);

    while ((newLine = SourceFile.ReadLine()) != null)
    {
    if (counter == 0)
    {
    columname = newLine.ToString();
    columname = "[" + columname.Replace(delimiter, "],[") + "]";
    headersCount = columname.Split('[').Length - 1;
    }

    else
    {
    readLine = appendLine + newLine.Replace("'", "''");

    string[] ParseredLine = CSVParser.Split(readLine);
    string values = string.Join("','", ParseredLine);

    if (headersCount == ParseredLine.Length)
    {
    // Clean up
    values = values.Replace(", '',", ",null,").Replace(",'',", ",null,").Replace(",'' ,", ",null,");
    values = values.Replace(",'\"", ",'").Replace("\"',", "',");

    string query = "Insert into " + TableName + "(" + columname + ", [FileName]) VALUES('" + values + "' , '" + fileName.Replace(SourceDirectory, "") + "')";
    SqlCommand myCommand = new SqlCommand(query, myADONETConnection);
    myCommand.ExecuteNonQuery();

    appendLine = "";
    }
    else {
    // MessageBox.Show("file has a break, here is the line so far");
    appendLine += newLine.Replace("'", "''");
    }
    }

    counter++;
    }

    SourceFile.Close();
    }
    }

    Dts.TaskResult = (int)ScriptResults.Success;
    }

    ReplyDelete
  4. btw.. need to remove: && fileName.Contains("RESM")

    ReplyDelete