CSV file parser and writer in C# (Part 2)

This is the second part of an article series exploring reading and writing CSV files with C#/.NET.

Part 1 covered converting the contents of a DataTable into CSV format; this part explains reading a CSV file back into a DataTable.

First some namespace imports and the namespace declaration for this project:

using System;
using System.Collections;
using System.Data;
using System.Text;
using System.IO;

namespace CsvParser
{

Like the CsvWriter class, all methods in the CsvParser class are static. There are four parser methods that return a DataTable from either a string or text stream, and expecting a header line in the CSV source or not. Only one method has a real body, all others just adjust their parameter signature. The implementation is simple: the method reads one "row" of the CSV source at a time and populates a row in the DataTable. The real meat is the private class CsvStream explained below. There is one utility method that returns an unused column name for a DataTable, in case there are no headers in the CSV source or the headers are not unique.

        public class CsvParser
        {
                public static DataTable Parse(string data, bool headers)
                {
                        return Parse(new StringReader(data), headers);
                }
               
                public static DataTable Parse(string data)
                {
                        return Parse(new StringReader(data));
                }

                public static DataTable Parse(TextReader stream)
                {
                        return Parse(stream, false);
                }

                public static DataTable Parse(TextReader stream, bool headers)
                {
                        DataTable table = new DataTable();
                        CsvStream csv = new CsvStream(stream);
                        string[] row = csv.GetNextRow();
                        if (row == null)
                                return null;
                        if (headers)
                        {
                                foreach (string header in row)
                                {
                                        if (header != null && header.Length > 0 && !table.Columns.Contains(header))
                                                table.Columns.Add(header, typeof(string));
                                        else
                                                table.Columns.Add(GetNextColumnHeader(table), typeof(string));
                                }
                                row = csv.GetNextRow();
                        }
                        while (row != null)
                        {
                                while (row.Length > table.Columns.Count)
                                        table.Columns.Add(GetNextColumnHeader(table), typeof(string));
                                table.Rows.Add(row);
                                row = csv.GetNextRow();
                        }
                        return table;
                }

                private static string GetNextColumnHeader(DataTable table)
                {
                        int c = 1;
                        while (true)
                        {
                                string h = "Column" + c++;
                                if (!table.Columns.Contains(h))
                                        return h;
                        }
                }

The CsvStream class does the actual work – read the CSV source in one character at a time and return meaningful chunks of decoded data, namely data items and rows.

                private class CsvStream
                {
                        private TextReader stream;                 
                       
                        public CsvStream(TextReader s)
                        {
                                stream = s;
                        }

                        public string[] GetNextRow()
                        {
                                ArrayList row = new ArrayList();
                                while (true)
                                {
                                        string item = GetNextItem();
                                        if (item == null)
                                                return row.Count == 0 ? null : (string[])row.ToArray(typeof(string));
                                        row.Add(item);
                                }
                        }

                        private bool EOS = false;
                        private bool EOL = false;

                        private string GetNextItem()
                        {
                                if (EOL)
                                {
                                        // previous item was last in line, start new line
                                        EOL = false;
                                        return null;
                                }

                                bool quoted = false;
                                bool predata = true;
                                bool postdata = false;
                                StringBuilder item = new StringBuilder();
                               
                                while (true)
                                {
                                        char c = GetNextChar(true);
                                        if (EOS)
                                                return item.Length > 0 ? item.ToString() : null;

                                        if ((postdata || !quoted) && c == ',')
                                                // end of item, return
                                                return item.ToString();
                                       
                                        if ((predata || postdata || !quoted) && (c == '\x0A' || c == '\x0D'))
                                        {
                                                // we are at the end of the line, eat newline characters and exit
                                                EOL = true;
                                                if (c == '\x0D' && GetNextChar(false) == '\x0A')
                                                        // new line sequence is 0D0A
                                                        GetNextChar(true);
                                                return item.ToString();
                                        }

                                        if (predata && c == ' ')
                                                // whitespace preceeding data, discard
                                                continue;

                                        if (predata && c == '"')
                                        {
                                                // quoted data is starting
                                                quoted = true;
                                                predata = false;
                                                continue;
                                        }

                                        if (predata)
                                        {
                                                // data is starting without quotes
                                                predata = false;
                                                item.Append(c);
                                                continue;
                                        }

                                        if (c == '"' && quoted)
                                        {
                                                if (GetNextChar(false) == '"')
                                                        // double quotes within quoted string means add a quote       
                                                        item.Append(GetNextChar(true));
                                                else
                                                        // end-quote reached
                                                        postdata = true;
                                                continue;
                                        }

                                        // all cases covered, character must be data
                                        item.Append(c);
                                }
                        }

                        private char[] buffer = new char[4096];
                        private int pos = 0;
                        private int length = 0;

                        private char GetNextChar(bool eat)
                        {
                                if (pos >= length)
                                {
                                        length = stream.ReadBlock(buffer, 0, buffer.Length);
                                        if (length == 0)
                                        {
                                                EOS = true;
                                                return '\0';
                                        }
                                        pos = 0;
                                }
                                if (eat)
                                        return buffer[pos++];
                                else
                                        return buffer[pos];
                        }
                }
        }
}

And that's about it. In an upcoming part of this article I'll share some NUnit test cases for these classes.

CSV file parser and writer in C# (Part 1)

An issue that comes up quite frequently is how to read and write comma seperated value (CSV) files in C#. Surprisingly the .NET libraries have no built-in support for this, and the usual solution to use an OleDb connection to the CSV file with Microsoft Excel's database driver is convoluted and not cross-platform.

CSV files have a very simple structure (source):

  • Each record is one line (with exceptions)
  • Fields are separated with commas
  • Leading and trailing space-characters adjacent to comma field separators are ignored
  • Fields with embedded commas must be delimited with double-quote characters
  • Fields that contain double quote characters must be surounded by double-quotes, and the embedded double-quotes must each be represented by a pair of consecutive double quotes.
  • A field that contains embedded line-breaks must be surounded by double-quotes
  • Fields with leading or trailing spaces must be delimited with double-quote characters
  • Fields may always be delimited with double quotes
  • The first record in a CSV file may be a header record containing column (field) names

In this article, I'll provide simple, but fully functional code to read and write a CSV file according to these rules. In memory, data will be represented as a DataTable, which makes it easy to process; for storage in the file system or transfer over a network the CSV data will be stored as a String or in a Stream.


First, the easy part: writing a DataTable to a CSV file.

public class CsvWriter
{
        public static string WriteToString(DataTable table, bool header, bool quoteall)
        {
                StringWriter writer = new StringWriter();
                WriteToStream(writer, table, header, quoteall);
                return writer.ToString();
        }

        public static void WriteToStream(TextWriter stream, DataTable table, bool header, bool quoteall)
        {
                if (header)
                {
                        for (int i = 0; i < table.Columns.Count; i++)
                        {
                                WriteItem(stream, table.Columns[i].Caption, quoteall);
                                if (i < table.Columns.Count1)
                                        stream.Write(',');
                                else
                                        stream.Write('\n');
                        }
                }
                foreach (DataRow row in table.Rows)
                {
                        for (int i = 0; i < table.Columns.Count; i++)
                        {
                                WriteItem(stream, row[i], quoteall);
                                if (i < table.Columns.Count1)
                                        stream.Write(',');
                                else
                                        stream.Write('\n');
                        }
                }
        }

        private static void WriteItem(TextWriter stream, object item, bool quoteall)
        {
                if (item == null)
                        return;
                string s = item.ToString();
                if (quoteall || s.IndexOfAny("\",\x0A\x0D".ToCharArray()) > –1)
                        stream.Write("\"" + s.Replace("\"", "\"\"") + "\"");
                else
                        stream.Write(s);
        }
}

The methods are static since the whole conversion is done in one method call, there is no need to create object instances etc.

WriteToString will return the CSV file in a string; it is just a wrapper around the more generic WriteToStream method. Both methods take a DataTable and two boolean flags to indicate if you want to write a header line (which would use the column headers of the DataTable) and if you want to quote all values instead of only values that need to be quoted.

Since CSV files do not work well for binary data, your DataTable should not contain any, although the resulting file would still be valid and could be read back in.

WriteToStream just loops through all rows and columns of the DataTable and writes the individual data items to the output stream.

The WriteItem method finally encodes an individual data item and, if necessary or requested, adds quotes around it.

In upcoming parts of this article series, I'll provide and explain code to read a CSV file back into a DataTable, and how to use NUnit to test everything.

Using BlogEngine.net as a General Purpose Content Management System

So I keep running into the same problem – I am building a small website for somebody and I need to provide them with a way to update the content of their site so I don’t have to. Basically, I need a lightweight and flexible content management system that is easy to use.

If The Shoe Fits…

When I first thought of a lightweight CMS, I thought of graffiti. It sounds like exactly what I need. So I downloaded the express edition and started evaluating it. It seemed like a nice product and all is not free for commercial use ($399 is the cheapest commercial licence) and I can’t afford that price tag when building small websites.

Enter BlogEngine.net. My favorite blogging platform. There, I said it.  Well, I use it to run my blog and I am constantly tinkering around with the site all of the time because I enjoy using BlogEngine.net so much.

I thought that BlogEngine.net has all of the key pieces I needed for my lightweight CMS:

  1. A WYSIWYG Editor
  2. A Metaweblog interface
  3. Tons of extensibility

Basic Idea

I decided to base my CMS implementation on the concept of pages. Most blog engines have two distinct types of content: pages and posts. Posts are the typical type of content that becomes part of your blogs feed whereas pages are usually static content which can be anything outside of a blog post (for example an ‘About Me’ page). BlogEngine.net already has everything I need to get the content of page created and persisted in a data store (it supports xml and sql server out of the box). I decided to write a web control which I can place on any webpage and include the contents of a given page from the data store.

I made a control called PageViewer which you can place on the page like this:

<blog:PageViewer ID=“view” runat=“server” DisplayTitle=“false” PageId=“167eb7f3-135b-4f90-9756-be25ec10f14c” />

This control basically just looks up the page using the given id (this functionality is all provided by the existing BlogEngine.Core library) and displays its content. Here is the rendering logic

if (PageId != Guid.Empty)	
page = BlogEngine.Core.Page.GetPage(PageId);if (page != null){	ServingEventArgs arg = new ServingEventArgs(page.Content,ServingLocation.SinglePage);BlogEngine.Core.Page.OnServing(page, arg);	if (arg.Cancel)	Page.Response.Redirect("error404/", true);	
if (DisplayTitle)	{		writer.Write("<h1>");		writer.Write(page.Title);		writer.Write("</h1>");	}	
writer.Write("<div>");	writer.Write(arg.Body);	writer.Write("</div>");}

This code is pretty straight forward – all it does is get an instance of the page and then display its title in <h1> a tag and its body in <div> tag. This logic is actually straight from the existing page retrieval code that already exists in BlogEngine.net. This web control is pretty much the only new code I had to write. The rest of the project mostly involves moving files around and removing parts of the BlogEngine.net framework that I don’t need.

Armed with this control, we are ready to start converting the static pages from the old version of the website to be BlogEngine.net pages which can be stored and retrieved using the BlogEngine.Core classes.

Themes

It’s also worth noting that there are many themes available for BlogEngine

themes4blogengine.net 

Wiki

I've been after creating a Wiki for some time, and I think I may have found one worth looking at 'Screw Turn', not the most inspiring name but I have read good reviews, so I'll be having a look in my spare time at what it can do and how I can implement it www.screwturn.eu

 

Directions to Coppermill

DIRECTIONS TO THE 31 The Pound, Bromham, SN15 2HE
From where-ever you start drive to Junction 17 of M4
THE FOLLOWING WILL ONLY TAKE YOU ABOUT 15 MINS!
Take the A350 to Chippenham (Mostly Dual Carriageway)
Services on 1st roundabout and keep following signs ‘A350 Chippenham’
Lots of rounabouts
Reach Roundabout turn left following signs for Chippenham/Devizes this roundabout has Sainsburys on your left (its quite hidden) 
Drive through housing area with lots of roundabouts (your going round outskirts of chippenham)
Reach roundabout with ‘Town Centre left’ and ‘Devizes right’, turn right.
Drive  pass small petrol station and a pub turn turn right up hill ‘A342 Devizes’
Go up hill pass ‘Bowood Country Club’ on your left
Follow road through ‘Sandy Lane’ with lovely thatch cottages
Road becomes v straight again with sign post ‘Melksham right’ DON’T turn right! 
Keep straight on
Almost there so slow down, a few bends, sign post turn right BROMHAM
Drive pass tennis courts on right with kids play area –  keep slow!
Set of trees on right, Bryans house is 1st house on right
IT’S QUITE HIDDEN!
Turn right and park 

this and that