developer's diary

最近はc#のエントリが多いです

C# (dotnetcore) DataTableをSerialize/Deserializeする(System.Xml.SerializationとProtoBuf.Dataを比較)

DataTableをシリアライズ /デシリアライズ してみます。

また、XmlSerializerと、protobuf-net-dataでシリアライズで作成されたファイルのサイズ比較、 シリアライズ 、デシリアライズ の速度比較を行ってみました。

用語

用語 英語 意味
シリアライズ Serialize オブジェクトを文字列、バイト配列に置き換える
デシリアライズ Deserialize シリアライズ した文字列・バイト配列からオブジェクトに戻す

DataTableを作る

DataTable dt = new DataTable();
dt.TableName = "TABLE";
dt.Columns.Add("NO", typeof(int));
dt.Columns.Add("NAME", typeof(string));
dt.Columns.Add("COL1", typeof(string));
dt.Columns.Add("COL2", typeof(string));
dt.Columns.Add("COL3", typeof(string));
dt.Columns.Add("COL4", typeof(string));
dt.Columns.Add("COL5", typeof(string));
dt.Columns.Add("COL6", typeof(string));
dt.Columns.Add("COL7", typeof(string));

DataTableにデータを追加

MD5関数作成(サンプルデータに使います)

private static string MD5(int i)
{
    return System.Convert.ToBase64String(new System.Security.Cryptography.MD5CryptoServiceProvider().ComputeHash(System.Text.ASCIIEncoding.ASCII.GetBytes(i.ToString())));
}

データ追加用の関数作成

private static void AddRow(DataTable dt, int i)
{
    var dr = dt.NewRow();
    dr["NO"] = i;
    dr["NAME"] = "NAME:" + i.ToString();
    dr["COL1"] = MD5(i);
    dr["COL2"] = MD5(i * 2);
    dr["COL3"] = MD5(i * 3);
    dr["COL4"] = MD5(i * 4);
    dr["COL5"] = MD5(i * 5);
    dr["COL6"] = MD5(i * 6);
    dr["COL7"] = System.DBNull.Value;
    dt.Rows.Add(dr);
}

データ追加の様子(i<1;の1を増やすと作成するレコード数が増えます)

for (var i = 0; i < 1; i++) AddRow(dt, i);

XmlSerializerでシリアライズ

using (Stream stream = new FileStream("data.xml", FileMode.Create))
{
    XmlSerializer serializer = new XmlSerializer(dt.GetType());
    serializer.Serialize(stream, dt);
}

XmlSerializerでデシリアライズ

using (Stream stream = File.OpenRead("data.xml"))
{
    XmlSerializer serializer = new XmlSerializer(dt.GetType());
    xmlSerializerDt = (DataTable)serializer.Deserialize(stream);
}

ProtoBufでシリアライズ

using (Stream stream = new FileStream("data.data", FileMode.Create))
{
    DataSerializer.Serialize(stream, dt);
}

ProtoBufでデシリアライズ

var protoBufDt = new DataTable();
using (Stream stream = File.OpenRead("data.data"))
{
    protoBufDt.Load(DataSerializer.Deserialize(stream));
}

速度比較用プログラム

using System;
using System.Data;
using System.Linq;
using System.Diagnostics;
using System.IO;
using System.Xml.Serialization;
using ProtoBuf.Data;

namespace serializeExample
{
    class Program
    {
        static void Main(string[] args)
        {
            DataTable dt = new DataTable();
            dt.TableName = "TABLE";
            dt.Columns.Add("NO", typeof(int));
            dt.Columns.Add("NAME", typeof(string));
            dt.Columns.Add("COL1", typeof(string));
            dt.Columns.Add("COL2", typeof(string));
            dt.Columns.Add("COL3", typeof(string));
            dt.Columns.Add("COL4", typeof(string));
            dt.Columns.Add("COL5", typeof(string));
            dt.Columns.Add("COL6", typeof(string));
            dt.Columns.Add("COL7", typeof(string));

            for (var i = 0; i < 1; i++) AddRow(dt, i);
            Console.WriteLine("Count:{0:#,#}", dt.Rows.Count);
            diff(dt);

            for (var i = 0; i < 10; i++) AddRow(dt, i);
            Console.WriteLine("Count:{0:#,#}", dt.Rows.Count);
            diff(dt);

            for (var i = 0; i < 100; i++) AddRow(dt, i);
            Console.WriteLine("Count:{0:#,#}", dt.Rows.Count);
            diff(dt);

            for (var i = 0; i < 1000; i++) AddRow(dt, i);
            Console.WriteLine("Count:{0:#,#}", dt.Rows.Count);
            diff(dt);

            for (var i = 0; i < 10000; i++) AddRow(dt, i);
            Console.WriteLine("Count:{0:#,#}", dt.Rows.Count);
            diff(dt);

            for (var i = 0; i < 100000; i++) AddRow(dt, i);
            Console.WriteLine("Count:{0:#,#}", dt.Rows.Count);
            diff(dt);

            for (var i = 0; i < 1000000; i++) AddRow(dt, i);
            Console.WriteLine("Count:{0:#,#}", dt.Rows.Count);
            diff(dt);
        }

        private static void diff(DataTable dt)
        {
            DataTable xmlSerializerDt = null;
            DataTable protoBufDt = null;

            Stopwatch sw = new Stopwatch();
            sw.Start();
            using (Stream stream = new FileStream("data.xml", FileMode.Create))
            {
                XmlSerializer serializer = new XmlSerializer(dt.GetType());
                serializer.Serialize(stream, dt);
            }
            sw.Stop();
            Console.WriteLine("XML.Serialize:{0:#,#}ms", sw.ElapsedMilliseconds);

            Console.WriteLine("File Size:{0:#,#} byte", new FileInfo("data.xml").Length);

            sw.Reset();
            sw.Start();
            using (Stream stream = File.OpenRead("data.xml"))
            {
                XmlSerializer serializer = new XmlSerializer(dt.GetType());
                xmlSerializerDt = (DataTable)serializer.Deserialize(stream);
            }
            sw.Stop();
            Console.WriteLine("XML.Deserialize:{0:#,#}ms", sw.ElapsedMilliseconds);


            sw.Reset();
            sw.Start();
            using (Stream stream = new FileStream("data.data", FileMode.Create))
            {
                DataSerializer.Serialize(stream, dt);
            }
            sw.Stop();
            Console.WriteLine("ProtoBuf.Serialize:{0:#,#}ms", sw.ElapsedMilliseconds);

            Console.WriteLine("File Size:{0:#,#} byte", new FileInfo("data.data").Length);

            sw.Reset();
            sw.Start();
            protoBufDt = new DataTable();
            using (Stream stream = File.OpenRead("data.data"))
            {
                protoBufDt.Load(DataSerializer.Deserialize(stream));
            }
            sw.Stop();
            Console.WriteLine("ProtoBuf.Deserialize:{0:#,#}ms", sw.ElapsedMilliseconds);


            DataTable dtDiff = new DataTable();

            var wkDiffs2 = dt.AsEnumerable().Except(xmlSerializerDt.AsEnumerable(), DataRowComparer.Default);
            Console.WriteLine("Difference between original and XmlSerializer:{0}", wkDiffs2.Count());
            var wkDiffs3 = dt.AsEnumerable().Except(protoBufDt.AsEnumerable(), DataRowComparer.Default);
            Console.WriteLine("Difference between original and ProtoBuf:{0}", wkDiffs3.Count());

        }

        private static void AddRow(DataTable dt, int i)
        {

            var dr = dt.NewRow();
            dr["NO"] = i;
            dr["NAME"] = "NAME:" + i.ToString();
            dr["COL1"] = MD5(i);
            dr["COL2"] = MD5(i * 2);
            dr["COL3"] = MD5(i * 3);
            dr["COL4"] = MD5(i * 4);
            dr["COL5"] = MD5(i * 5);
            dr["COL6"] = MD5(i * 6);
            dr["COL7"] = System.DBNull.Value;
            dt.Rows.Add(dr);

        }

        private static string MD5(int i)
        {
            return System.Convert.ToBase64String(new System.Security.Cryptography.MD5CryptoServiceProvider().ComputeHash(System.Text.ASCIIEncoding.ASCII.GetBytes(i.ToString())));
        }
    }
}

結果

1回目が遅いですね。JIT(Just-In-Time)によって、2回目以降早くなっているんでしょうか。

レコード件数 XML.Serialize File Size ProtoBuf File Size XML.Serialize ProtoBuf.Serialize XML.Deserialize ProtoBuf.Deserialize
1 1,909 258 65ms 32ms 18ms 14ms
11 6,112 1,938 2ms 1ms 2ms 1ms
111 48,515 18,828 2ms 1ms 2ms 1ms
1,111 476,518 189,590 10ms 4ms 23ms 4ms
11,111 4,796,521 1,908,352 105ms 29ms 102ms 82ms
111,111 48,396,524 19,280,730 851ms 275ms 834ms 835ms
1,111,111 488,396,527 194,153,108 9,106ms 3,085ms 11,941ms 8,853ms

結果(コンソール出力)

Count:1
XML.Serialize:65ms
File Size:1,909 byte
XML.Deserialize:18ms
ProtoBuf.Serialize:32ms
File Size:258 byte
ProtoBuf.Deserialize:14ms
Difference between original and XmlSerializer:0
Difference between original and ProtoBuf:0
Count:11
XML.Serialize:2ms
File Size:6,112 byte
XML.Deserialize:2ms
ProtoBuf.Serialize:1ms
File Size:1,938 byte
ProtoBuf.Deserialize:1ms
Difference between original and XmlSerializer:0
Difference between original and ProtoBuf:0
Count:111
XML.Serialize:2ms
File Size:48,515 byte
XML.Deserialize:1ms
ProtoBuf.Serialize:1ms
File Size:18,828 byte
ProtoBuf.Deserialize:2ms
Difference between original and XmlSerializer:0
Difference between original and ProtoBuf:0
Count:1,111
XML.Serialize:10ms
File Size:476,518 byte
XML.Deserialize:23ms
ProtoBuf.Serialize:4ms
File Size:189,590 byte
ProtoBuf.Deserialize:4ms
Difference between original and XmlSerializer:0
Difference between original and ProtoBuf:0
Count:11,111
XML.Serialize:105ms
File Size:4,796,521 byte
XML.Deserialize:102ms
ProtoBuf.Serialize:29ms
File Size:1,908,352 byte
ProtoBuf.Deserialize:82ms
Difference between original and XmlSerializer:0
Difference between original and ProtoBuf:0
Count:111,111
XML.Serialize:851ms
File Size:48,396,524 byte
XML.Deserialize:834ms
ProtoBuf.Serialize:275ms
File Size:19,280,730 byte
ProtoBuf.Deserialize:835ms
Difference between original and XmlSerializer:0
Difference between original and ProtoBuf:0
Count:1,111,111
XML.Serialize:9,106ms
File Size:488,396,527 byte
XML.Deserialize:11,941ms
ProtoBuf.Serialize:3,085ms
File Size:194,153,108 byte
ProtoBuf.Deserialize:8,853ms
Difference between original and XmlSerializer:0
Difference between original and ProtoBuf:0

参考