Search

Monday 2 May 2011

Oh No! Not Another Way to Write a CSV File??


It's one of those things I've implemented many times, usually when phrase "... and we'd also like to be able to export it as a CSV." has been tacked onto the end of some requirement at the last minute! Accordingly, each time I have had to knock up some sort of "CSV writer" at the last minute, I have invariably used a different approach.

The approach I am generally favouring at the moment, mainly for its versatility, is the use of an extension method which can be called from any strongly-typed collection. After all, in most scenarios our data is usually in the shape of some form of object collection. In brief, the extension method is as follows:

// C#
public static void ToCsv<T>(this IEnumerable<T> objects, Stream outputStream, Encoding encoding, char columnSeparator, string lineTerminator, char encapsulationCharacter, bool autoGenerateColumnHeaders, string[] columnHeaders, params Expression<Func<T, object>>[] outputValues)
{
    StreamWriter writer = new StreamWriter(outputStream, encoding);
    WriteColumnHeaders(writer, columnSeparator, lineTerminator, encapsulationCharacter, autoGenerateColumnHeaders, columnHeaders, outputValues);
    WriteData(objects, writer, columnSeparator, lineTerminator, encapsulationCharacter, outputValues);
    writer.Flush();
}
' Visual Basic
<Extension()>
Public Sub ToCsv(Of T)(ByVal objects As IEnumerable(Of T), ByVal outputStream As Stream, ByVal encoding As Encoding, ByVal columnSeparator As Char, ByVal lineTerminator As String, ByVal encapsulationCharacter As Char, ByVal autoGenerateColumns As Boolean, ByVal columnHeaders() As String, ByVal ParamArray outputValues() As Expression(Of Func(Of T, Object)))
    Dim writer As StreamWriter = New StreamWriter(outputStream, encoding)
    WriteColumnHeaders(writer, columnSeparator, lineTerminator, encapsulationCharacter, autoGenerateColumns, columnHeaders, outputValues)
    WriteData(objects, writer, columnSeparator, lineTerminator, encapsulationCharacter, outputValues)
    writer.Flush()
End Sub

The parameters of the method are as follows:

ParameterDescription
objectsThe collection of objects to be output in the CSV.
outputStreamThe Stream to output the CSV to. For example, this could be a filesystem stream or an HTTP stream.
encodingThe type of character encoding to use.
columnSeparatorThe character used to separate the columns. Traditionally a comma (',').
lineTerminatorThe character sequence to denote the end of a line, e.g.: CRLF for Windows, LF for UNIX.
encapsulationCharacterThe character used to encapsulate a value if that value contains the columnSeparator character. Traditionally double-quotes ('"').
autoGenerateColumnHeadersSpecifies whether to auto-generate the column headers.
columnHeadersSpecifies column headers. Ignored if autoGenerateColumnHeaders is true.
outputValuesA series of expressions to determine which values are to be output to the CSV.

As you can see, the extension methods calls two methods: WriteColumnHeaders() and WriteData() for writing the column headers and data respectively. Firstly, let's look at the code for WriteColumnHeaders():

// C#
private static void WriteColumnHeaders<T>(StreamWriter writer, char columnSeparator, string lineTerminator, char encapsulationCharacter, bool autoGenerateColumnHeaders, string[] columnHeaders, params Expression<Func<T, object>>[] outputValues)
{
    if (autoGenerateColumnHeaders)
    {
        for (int i = 0; i < outputValues.Length; i++)
        {
            Expression<Func<T, object>> expression = outputValues[i];
            string columnHeader;
            if (expression.Body is MemberExpression)
            {
                MemberExpression body = (MemberExpression)expression.Body;
                columnHeader = body.Member.Name;
            }
            else
                columnHeader = expression.Body.ToString();
            writer.Write(String.Format("{0}{1}", columnHeader.EncapsulateIfRequired(columnSeparator, encapsulationCharacter), columnSeparator));
        }
        writer.Write(lineTerminator);
    }
    else
    {
        if (columnHeaders != null && columnHeaders.Length > 0)
        {
            if (columnHeaders.Length == outputValues.Length)
            {
                for (int i = 0; i < columnHeaders.Length; i++)
                    writer.Write(String.Format("{0}{1}", columnHeaders[i].EncapsulateIfRequired(columnSeparator, encapsulationCharacter), columnSeparator));
                writer.Write(lineTerminator);
            }
            else
                throw new ArgumentException("The number of column headers does not match the number of output values.");
        }
    }
}
' Visual Basic
Private Sub WriteColumnHeaders(Of T)(ByVal writer As StreamWriter, ByVal columnSeparator As Char, ByVal lineTerminator As String, ByVal encapsulationCharacter As Char, ByVal autoGenerateColumns As Boolean, ByVal columnHeaders() As String, ByVal ParamArray outputValues() As Expression(Of Func(Of T, Object)))
    If autoGenerateColumns Then
        For i As Integer = 0 To outputValues.Length - 1
            Dim expression As Expression(Of Func(Of T, Object)) = outputValues(i)
            Dim columnHeader As String
            If TypeOf expression.Body Is MemberExpression Then
                Dim body As MemberExpression = DirectCast(expression.Body, MemberExpression)
                columnHeader = body.Member.Name
            Else
                columnHeader = expression.Body.ToString()
            End If
            writer.Write("{0}{1}", columnHeader.EncapsulateIfRequired(columnSeparator, encapsulationCharacter), columnSeparator)
        Next
        writer.Write(lineTerminator)
    Else
        If Not columnHeaders Is Nothing And columnHeaders.Length > 0 Then
            If columnHeaders.Length = outputValues.Length Then
                For i As Integer = 0 To columnHeaders.Length - 1
                    writer.Write(String.Format("{0}{1}", columnHeaders(i).EncapsulateIfRequired(columnSeparator, encapsulationCharacter), columnSeparator))
                Next
                writer.Write(lineTerminator)
            Else
                Throw New ArgumentException("The number of column headers does not match the number of output values.")
            End If
        End If
    End If
End Sub

If autoGenerateColumns is set to true, then this method will evaluate each of the expressions specified in outputValues. If the expression is a simple MemberExpression (i.e.: a call to a single property) then the column header will be set to the name of the member. If not, then the column header will be set to the string form of the expression.

If autoGenerateColumns is set to false, then the method will use the column headers supplied in columnHeaders, first making sure that the number of columns and column headers match.

Next, the code for the WriteData() method:

// C#
private static void WriteData<T>(this IEnumerable<T> objects, StreamWriter writer, char columnSeparator, string lineTerminator, char encapsulationCharacter, params Expression<Func<T, object>>[] outputValues)
{
    foreach (T obj in objects)
    {
        if (obj != null)
        {
            for (int i = 0; i < outputValues.Length; i++)
            {
                Func<T, object> valueFunc = outputValues[i].Compile();
                object value = valueFunc(obj);
                if (value != null)
                {
                    string valueString = value.ToString();
                    writer.Write(valueString.EncapsulateIfRequired(columnSeparator, encapsulationCharacter));
                }
                writer.Write(columnSeparator);
            }
            writer.Write(lineTerminator);
        }
    }
}
' Visual Basic
Private Sub WriteData(Of T)(ByVal objects As IEnumerable(Of T), ByVal writer As StreamWriter, ByVal columnSeparator As Char, ByVal lineTerminator As String, ByVal encapsulationCharacter As Char, ByVal ParamArray outputValues() As Expression(Of Func(Of T, Object)))
    For Each obj As T In objects
        If Not obj Is Nothing Then
            For i As Integer = 0 To outputValues.Length - 1
                Dim valueFunc As Func(Of T, Object) = outputValues(i).Compile()
                Dim value As Object = valueFunc(obj)
                If Not value Is Nothing Then
                    Dim valueString As String = value.ToString()
                    writer.Write(valueString.EncapsulateIfRequired(columnSeparator, encapsulationCharacter))
                End If
                writer.Write(columnSeparator)
            Next
            writer.Write(lineTerminator)
        End If
    Next
End Sub

This method enumerates through our collection of objects and outputs the desired values to our CSV, in the order we have specified them.

You will see that both of these methods make use of a further extension method, EncapsulateIfRequired(), for encapsulating the string values if they contain the column-separator. The code for this method is as follows:

// C#
private static string EncapsulateIfRequired(this string theString, char columnSeparator, char encapsulationCharacter)
{
    if (theString.Contains(columnSeparator))
        return String.Format("{1}{0}{1}", theString, encapsulationCharacter);
    else
        return theString;
}
' Visual Basic
<Extension()>
Private Function EncapsulateIfRequired(ByVal theString As String, ByVal columnSeparator As Char, ByVal encapsulationCharacter As Char) As String
    If theString.Contains(columnSeparator) Then
        Return String.Format("{1}{0}{1}", theString, encapsulationCharacter)
    Else
        Return theString
    End If
End Function

With all this in place you can add various overloads as required, to supply default values when calling the method. For example:

// C#
public static void ToCsv<T>(this IEnumerable<T> objects, Stream outputStream, params Expression<Func<T, object>>[] outputValues)
{
    objects.ToCsv(outputStream, Encoding.Default, ',', "\r\n", '"', true, null, outputValues);
}
' Visual Basic
<Extension()>
Public Sub ToCsv(Of T)(ByVal objects As IEnumerable(Of T), ByVal outputStream As Stream, ByVal ParamArray outputValues() As Expression(Of Func(Of T, Object)))
    objects.ToCsv(outputStream, Encoding.Default, ",", vbCrLf, """", True, Nothing, outputValues)
End Sub

An Example

In this example we are going to output an array of Person objects to a CSV file on the filesystem, with the columns in the order of LastName followed by FirstName:

// C#
public class Person
{
    public string FirstName { get; set; }
    public string LastName { get; set; }
}

Person[] people = new Person[]
{
    new Person() { FirstName = "Joe", LastName = "Bloggs" },
    new Person() { FirstName = "Fred", LastName = "Bloggs" },
    new Person() { FirstName = "John", LastName = "Smith" },
    new Person() { FirstName= "David, John", LastName = "Jones" }
};

using (Stream fileStream = new FileStream("People.csv", FileMode.Create, FileAccess.Write, FileShare.None))
 people.ToCsv(fileStream, x => x.LastName, x => x.FirstName);
' Visual Basic
Public Class Person
    Public Property FirstName As String
    Public Property LastName As String
End Class

Dim people() As Person = _
{ _
    New Person() With {.FirstName = "Joe", .LastName = "Bloggs"}, _
    New Person() With {.FirstName = "Fred", .LastName = "Bloggs"}, _
    New Person() With {.FirstName = "John", .LastName = "Smith"}, _
    New Person() With {.FirstName = "David, John", .LastName = "Jones"}
}

Using fileStream As Stream = New FileStream("People.csv", FileMode.Create, FileAccess.Write, FileShare.None)
    people.ToCsv(fileStream, Function(x) x.LastName, Function(x) x.FirstName)
End Using

No comments:

Post a Comment