Showing posts with label FlatFile. Show all posts
Showing posts with label FlatFile. Show all posts

Friday, September 28, 2012

BIML Select a DataFlow based on FileHeader

On a project the source file where delivered in one and same directory, but having a variety of headers:

imageimageimage

First I catch the the column string of the first row and lead it via a conditional split to a row count:

image

Based on the row count I run a specific dataflow task:

image

Here is a way to solve it using BIML:

<Biml xmlns="http://schemas.varigence.com/biml.xsd">
    <Annotations>
        <Annotation>
            File: Dataflow.Select.biml
            Demo on selection a dataflow based on the file header
            Enviroment:
            DB: MS-SQL2012
            BIML: 1.6 VS2010 BIDS Helper
            (c) John Minkjan biml101.blogspot.com
        </Annotation>
    </Annotations>
    <FileFormats>
        <FlatFileFormat Name="FFF CheckFile" IsUnicode ="false" RowDelimiter ="CRLF" ColumnNamesInFirstDataRow ="false">
            <Columns>
                <Column Name="FileInfo" DataType="String" CodePage ="1252" Length="250" Delimiter="CRLF"></Column>
            </Columns>
        </FlatFileFormat>
        <FlatFileFormat Name="FFF Type1" IsUnicode ="false" RowDelimiter ="CRLF" ColumnNamesInFirstDataRow ="true" >
            <Columns>
                <Column Name="PersId" DataType="String" CodePage ="1252" Length="250" Delimiter=";"></Column>
                <Column Name="FirstName" DataType="String" CodePage ="1252" Length="250" Delimiter="CRLF"></Column>
            </Columns>
        </FlatFileFormat>
        <FlatFileFormat Name="FFF Type2" IsUnicode ="false" RowDelimiter ="CRLF" ColumnNamesInFirstDataRow ="true" >
            <Columns>
                <Column Name="PersId" DataType="String" CodePage ="1252" Length="250" Delimiter=";"></Column>
                <Column Name="LastName" DataType="String" CodePage ="1252" Length="250" Delimiter="CRLF"></Column>
            </Columns>
        </FlatFileFormat>
        <FlatFileFormat Name="FFF Type3" IsUnicode ="false" RowDelimiter ="CRLF" ColumnNamesInFirstDataRow ="true" >
            <Columns>
                <Column Name="PersId" DataType="String" CodePage ="1252" Length="250" Delimiter=";"></Column>
                <Column Name="FirstName" DataType="String" CodePage ="1252" Length="250" Delimiter=";"></Column>
                <Column Name="LastName" DataType="String" CodePage ="1252" Length="250" Delimiter="CRLF"></Column>
            </Columns>
        </FlatFileFormat>

    </FileFormats>
    <Connections>
        <FlatFileConnection Name ="FFC SourceFile" FileFormat ="FFF CheckFile" FilePath ="C:\\DEMO\\PERSON1.TXT">
            <Expressions>
                <Expression PropertyName="ConnectionString">@[User::ProcessFileName]</Expression>
            </Expressions>           
        </FlatFileConnection>
        <FlatFileConnection Name ="FFC Type1" FileFormat ="FFF Type1" FilePath ="C:\\DEMO\\PERSON1.TXT">
            <Expressions>
                <Expression PropertyName="ConnectionString">@[User::ProcessFileName]</Expression>
            </Expressions>
        </FlatFileConnection>
        <FlatFileConnection Name ="FFC Type2" FileFormat ="FFF Type2" FilePath ="C:\\DEMO\\PERSON2.TXT">
            <Expressions>
                <Expression PropertyName="ConnectionString">@[User::ProcessFileName]</Expression>
            </Expressions>
        </FlatFileConnection>
        <FlatFileConnection Name ="FFC Type3" FileFormat ="FFF Type3" FilePath ="C:\\DEMO\\PERSON3.TXT">
            <Expressions>
                <Expression PropertyName="ConnectionString">@[User::ProcessFileName]</Expression>
            </Expressions>
        </FlatFileConnection>
        <OleDbConnection   
            Name="CnOleDBAdventureWorks2012"
            ConnectionString="Data Source=.\MSSQL2012;Initial Catalog=AdventureWorks2012;Provider=SQLNCLI10.1;Integrated Security=SSPI;Connect Timeout=30;"/>
    </Connections>
    <Packages>
        <Package Name="Dataflow.Select" ConstraintMode="Linear">
            <Variables>
                <Variable Name="SourceFileLocation" DataType ="String">C:\DEMO\</Variable>
                <Variable Name="FilePrefix"    DataType="String">Person</Variable>
                <Variable Name="FileSuffix" DataType="String">txt</Variable>
                <Variable Name="ProcessFileName" DataType ="String">C:\DEMO\Person3.txt</Variable>
               
            </Variables>
            <Tasks>
                <ForEachFileLoop Name="FLC File Select" ConstraintMode ="Parallel"
                                 Folder ="C:\DEMO\" FileSpecification="Person*.txt"  >
                    <VariableMappings>
                        <VariableMapping Name="Mapping" VariableName="User.ProcessFileName" />
                    </VariableMappings>
                    <Expressions>
                        <Expression PropertyName="Directory">@[User::SourceFileLocation]</Expression>
                        <Expression PropertyName="FileSpec">@[User::FilePrefix]+"*."+ @[User::FileSuffix]</Expression>
                    </Expressions>
                    <Variables>
                        <Variable Name="RowCountType1" DataType="Int64">0</Variable>
                        <Variable Name="RowCountType2" DataType="Int64">0</Variable>
                        <Variable Name="RowCountType3" DataType="Int64">0</Variable>
                    </Variables>
                    <Tasks>
                        <Dataflow Name="DFT Get File Header">
                            <Transformations>
                                <FlatFileSource Name="FFC SourceFile" ConnectionName ="FFC SourceFile">
                                    <Columns>
                                        <Column SourceColumn="FileInfo" TargetColumn ="FileInfo"></Column>
                                    </Columns>
                                </FlatFileSource>
                                <ConditionalSplit Name="CS FileType">
                                    <OutputPaths>
                                        <OutputPath Name ="Type1">
                                            <Expression>[FileInfo] =="PersId;FirstName"</Expression>
                                        </OutputPath>
                                        <OutputPath Name ="Type2">
                                            <Expression>[FileInfo] =="PersId;LastName"</Expression>
                                        </OutputPath>
                                        <OutputPath Name ="Type3">
                                            <Expression>[FileInfo] =="PersId;FirstName;LastName"</Expression>
                                        </OutputPath>
                                    </OutputPaths>                                   
                                </ConditionalSplit>
                                <RowCount Name ="RC Type1" VariableName="User.RowCountType1">
                                    <InputPath OutputPathName="CS FileType.Type1"></InputPath>
                                </RowCount>
                                <RowCount Name ="RC Type2" VariableName="User.RowCountType2">
                                    <InputPath OutputPathName="CS FileType.Type2"></InputPath>
                                </RowCount>
                                <RowCount Name ="RC Type3" VariableName="User.RowCountType3">
                                    <InputPath OutputPathName="CS FileType.Type3"></InputPath>
                                </RowCount>
                            </Transformations>
                        </Dataflow>
                        <Dataflow Name="DFT Type1">
                            <PrecedenceConstraints>
                                <Inputs>
                                    <Input EvaluationOperation ="ExpressionAndConstraint"
                                           EvaluationValue ="Success"
                                           Expression ="@RowCountType1!=0"
                                           OutputPathName ="DFT Get File Header.Output"></Input>
                                </Inputs>
                            </PrecedenceConstraints>
                            <Transformations>
                                <FlatFileSource Name="FFC Type1" ConnectionName ="FFC Type1"></FlatFileSource>
                                <OleDbDestination Name="ODD Persons" ConnectionName="CnOleDBAdventureWorks2012">
                                    <ExternalTableOutput Table ="Persons"></ExternalTableOutput>
                                </OleDbDestination>
                            </Transformations>
                        </Dataflow>
                        <Dataflow Name="DFT Type2">
                            <PrecedenceConstraints>
                                <Inputs>
                                    <Input EvaluationOperation ="ExpressionAndConstraint"
                                           EvaluationValue ="Success"
                                           Expression ="@RowCountType2!=0"
                                           OutputPathName ="DFT Get File Header.Output"></Input>
                                </Inputs>
                            </PrecedenceConstraints>
                            <Transformations>
                                <FlatFileSource Name="FFC Type2" ConnectionName ="FFC Type2"></FlatFileSource>
                                <OleDbDestination Name="ODD Persons" ConnectionName="CnOleDBAdventureWorks2012">
                                    <ExternalTableOutput Table ="Persons"></ExternalTableOutput>
                                </OleDbDestination>
                            </Transformations>
                        </Dataflow>
                        <Dataflow Name="DFT Type3">
                            <PrecedenceConstraints>
                                <Inputs>
                                    <Input EvaluationOperation ="ExpressionAndConstraint"
                                           EvaluationValue ="Success"
                                           Expression ="@RowCountType3!=0"
                                           OutputPathName ="DFT Get File Header.Output"></Input>
                                </Inputs>
                            </PrecedenceConstraints>
                            <Transformations>
                                <FlatFileSource Name="FFC Type3" ConnectionName ="FFC Type3"></FlatFileSource>
                                <OleDbDestination Name="ODD Persons" ConnectionName="CnOleDBAdventureWorks2012">
                                    <ExternalTableOutput Table ="Persons"></ExternalTableOutput>
                                </OleDbDestination>
                            </Transformations>
                        </Dataflow>
                    </Tasks>
                </ForEachFileLoop>
            </Tasks>
           
        </Package>
    </Packages>
</Biml>

This will get you:

image

Till Next Time

Tuesday, September 25, 2012

BIML Export to FlatFile

Demo BIML on creating an export to flat file package based on the SQL-Server metadata views:
<#@ template language="C#" hostspecific="true"#>
<#@ import namespace="System.Data"#>
<#@ import namespace="System.Data.SqlClient"#>
<#@ import namespace="System.IO"#>
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
    <Annotations>
        <Annotation>
            File: Export.DataBase.Schema.biml
            Demo make a export to file package
            Enviroment:
            DB: MS-SQL2012
            BIML: 1.6 VS2010 BIDS Helper
            (c) John Minkjan biml101.blogspot.com
        </Annotation>
    </Annotations>
<#
    string CnSrcDB = "Server=.\\MSSQL2012;Initial Catalog=AdventureWorks2012;Integrated Security=SSPI;Provider=SQLNCLI11.1";
    string SrcTableCatalog = "AdventureWorks2012";
    string SrcTableSchema ="Sales";
    string TableCatalog="";
    string TableSchema ="";
    string TableName ="";
    string ColumnName ="";
    string DataType ="";
    string CharacterMaximumLength ="";
    string FlatFileFormatName ="";
    string MaxOrdinalPosition ="";
    string ColumnDelimeter =";";
   
    DataTable tFFFCol;
#>
<FileFormats>
    <# DataTable tFFF = ExternalDataAccess.GetDataTable(CnSrcDB,
    "SELECT t1.TABLE_CATALOG, t1.TABLE_SCHEMA, t1.TABLE_NAME,  max(t1.ORDINAL_POSITION) as MAX_ORDINAL_POSITION FROM  INFORMATION_SCHEMA.COLUMNS AS t1 INNER JOIN INFORMATION_SCHEMA.TABLES  AS t2 ON t1.TABLE_CATALOG = t2.TABLE_CATALOG  AND t1.TABLE_SCHEMA = t2.TABLE_SCHEMA AND t1.TABLE_NAME = t2.TABLE_NAME  WHERE        (t1.TABLE_CATALOG = '" +SrcTableCatalog + "') AND (t2.TABLE_TYPE = 'BASE TABLE')  and t2.TABLE_SCHEMA = '" + SrcTableSchema +"' group by t1.TABLE_CATALOG, t1.TABLE_SCHEMA, t1.TABLE_NAME");
    foreach (DataRow tFFFr in tFFF.Rows){
    TableCatalog =tFFFr[0].ToString();
    TableSchema = tFFFr[1].ToString();
    TableName = tFFFr[2].ToString();
    MaxOrdinalPosition = tFFFr[3].ToString();
    #>
    <FlatFileFormat FlatFileType="Delimited"     ColumnNamesInFirstDataRow="true"     RowDelimiter="CRLF"  Name="FFF <#=TableCatalog#> <#=TableSchema#> <#=TableName#>">
        <Columns>
            <# tFFFCol = ExternalDataAccess.GetDataTable(CnSrcDB,
            "SELECT t1.COLUMN_NAME, t1.DATA_TYPE, t1.CHARACTER_MAXIMUM_LENGTH, T1.ORDINAL_POSITION FROM INFORMATION_SCHEMA.COLUMNS AS t1 where t1.TABLE_CATALOG = '"+TableCatalog+"' and t1.TABLE_SCHEMA = '"+TableSchema+"' and t1.TABLE_NAME = '"+TableName+"'");
            foreach (DataRow tFFFRow in tFFFCol.Rows){
            ColumnName =tFFFRow[0].ToString();
            DataType = tFFFRow[1].ToString();
            CharacterMaximumLength = tFFFRow[2].ToString();   
                ColumnDelimeter = ";";
                if (MaxOrdinalPosition == tFFFRow[3].ToString()){ ColumnDelimeter = "CRLF"; } ;
                DataType = "String";                               
            #>
                <Column ColumnType="Delimited" DataType= "<#=DataType#>" Delimiter="<#=ColumnDelimeter#>" Length="4000" Name="<#=ColumnName#>"></Column>
            <# } #>
        </Columns>
    </FlatFileFormat>
    <#}  #>
</FileFormats>
    <Connections>
        <OleDbConnection
            Name="CnOleDBAdventureWorks2012"
            ConnectionString="Data Source=.\MSSQL2012;Initial Catalog=AdventureWorks2012;Provider=SQLNCLI10.1;Integrated Security=SSPI;Connect Timeout=30;"
            RetainSameConnection="true">
        </OleDbConnection>
        <# DataTable tFF = ExternalDataAccess.GetDataTable(CnSrcDB,
        "SELECT t1.TABLE_CATALOG, t1.TABLE_SCHEMA, t1.TABLE_NAME FROM  INFORMATION_SCHEMA.COLUMNS AS t1 INNER JOIN INFORMATION_SCHEMA.TABLES  AS t2 ON t1.TABLE_CATALOG = t2.TABLE_CATALOG  AND t1.TABLE_SCHEMA = t2.TABLE_SCHEMA AND t1.TABLE_NAME = t2.TABLE_NAME  WHERE        (t1.TABLE_CATALOG = '"
        +SrcTableCatalog+"') AND (t2.TABLE_TYPE = 'BASE TABLE') and t2.TABLE_SCHEMA = '"
        +SrcTableSchema+"' group by t1.TABLE_CATALOG, t1.TABLE_SCHEMA, t1.TABLE_NAME");
        foreach (DataRow tFFFr in tFFF.Rows){
        TableCatalog =tFFFr[0].ToString();
        TableSchema = tFFFr[1].ToString();
        TableName = tFFFr[2].ToString(); #>
            <FlatFileConnection Name="FF <#=TableCatalog#> <#=TableSchema#> <#=TableName#>"
                FileFormat="FFF <#=TableCatalog#> <#=TableSchema#> <#=TableName#>"
                FilePath="C:\\<#=TableCatalog#>.<#=TableSchema#>.<#=TableName#>.dat"></FlatFileConnection>           
            <# }#>
    </Connections>
    <Packages>
        <Package Name="Extract.All" ConstraintMode="Linear" FailParentOnFailure="false">
            <Tasks>
                <#    
DataTable t1 = ExternalDataAccess.GetDataTable(CnSrcDB,
"SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME FROM INFORMATION_SCHEMA.TABLES AS t2 WHERE (TABLE_CATALOG = '"
+SrcTableCatalog +"') AND (TABLE_TYPE = 'BASE TABLE') and t2.TABLE_SCHEMA = '"+SrcTableSchema+"' ");
foreach (DataRow row1 in t1.Rows){
    TableCatalog =row1[0].ToString();
    TableSchema = row1[1].ToString();
    TableName = row1[2].ToString(); #>
                <Dataflow  Name="DFT <#=TableCatalog#> <#=TableSchema#> <#=TableName#>">
                    <Transformations>
                        <OleDbSource
                            Name="ODS <#=TableCatalog#> <#=TableSchema#> <#=TableName#>"
                            ConnectionName="CnOleDBAdventureWorks2012">
                            <ExternalTableInput Table ="[<#=TableCatalog#>].[<#=TableSchema#>].[<#=TableName#>]">
                            </ExternalTableInput>
                        </OleDbSource>
                        <FlatFileDestination Name="DFF <#=TableCatalog#> <#=TableSchema#> <#=TableName#>"
                            ConnectionName="FF <#=TableCatalog#> <#=TableSchema#> <#=TableName#>"></FlatFileDestination>                       
                    </Transformations>
                </Dataflow>
                <# } #>
            </Tasks>
        </Package>
    </Packages>
</Biml>
Gives you:
image
Till Next Time