C# (dotnetcore) DataTableをSerialize/Deserializeする(System.Xml.SerializationとProtoBuf.Dataを比較)
DataTableをシリアライズ /デシリアライズ してみます。
また、XmlSerializerと、protobuf-net-dataでシリアライズで作成されたファイルのサイズ比較、 シリアライズ 、デシリアライズ の速度比較を行ってみました。
用語
用語 | 英語 | 意味 |
---|---|---|
シリアライズ | Serialize | オブジェクトを文字列、バイト配列に置き換える |
デシリアライズ | Deserialize | シリアライズ した文字列・バイト配列からオブジェクトに戻す |
DataTableを作る
DataTable dt = new DataTable(); dt.TableName = "TABLE"; dt.Columns.Add("NO", typeof(int)); dt.Columns.Add("NAME", typeof(string)); dt.Columns.Add("COL1", typeof(string)); dt.Columns.Add("COL2", typeof(string)); dt.Columns.Add("COL3", typeof(string)); dt.Columns.Add("COL4", typeof(string)); dt.Columns.Add("COL5", typeof(string)); dt.Columns.Add("COL6", typeof(string)); dt.Columns.Add("COL7", typeof(string));
DataTableにデータを追加
MD5関数作成(サンプルデータに使います)
private static string MD5(int i) { return System.Convert.ToBase64String(new System.Security.Cryptography.MD5CryptoServiceProvider().ComputeHash(System.Text.ASCIIEncoding.ASCII.GetBytes(i.ToString()))); }
データ追加用の関数作成
private static void AddRow(DataTable dt, int i) { var dr = dt.NewRow(); dr["NO"] = i; dr["NAME"] = "NAME:" + i.ToString(); dr["COL1"] = MD5(i); dr["COL2"] = MD5(i * 2); dr["COL3"] = MD5(i * 3); dr["COL4"] = MD5(i * 4); dr["COL5"] = MD5(i * 5); dr["COL6"] = MD5(i * 6); dr["COL7"] = System.DBNull.Value; dt.Rows.Add(dr); }
データ追加の様子(i<1;の1を増やすと作成するレコード数が増えます)
for (var i = 0; i < 1; i++) AddRow(dt, i);
XmlSerializerでシリアライズ
using (Stream stream = new FileStream("data.xml", FileMode.Create)) { XmlSerializer serializer = new XmlSerializer(dt.GetType()); serializer.Serialize(stream, dt); }
XmlSerializerでデシリアライズ
using (Stream stream = File.OpenRead("data.xml")) { XmlSerializer serializer = new XmlSerializer(dt.GetType()); xmlSerializerDt = (DataTable)serializer.Deserialize(stream); }
ProtoBufでシリアライズ
using (Stream stream = new FileStream("data.data", FileMode.Create)) { DataSerializer.Serialize(stream, dt); }
ProtoBufでデシリアライズ
var protoBufDt = new DataTable(); using (Stream stream = File.OpenRead("data.data")) { protoBufDt.Load(DataSerializer.Deserialize(stream)); }
速度比較用プログラム
using System; using System.Data; using System.Linq; using System.Diagnostics; using System.IO; using System.Xml.Serialization; using ProtoBuf.Data; namespace serializeExample { class Program { static void Main(string[] args) { DataTable dt = new DataTable(); dt.TableName = "TABLE"; dt.Columns.Add("NO", typeof(int)); dt.Columns.Add("NAME", typeof(string)); dt.Columns.Add("COL1", typeof(string)); dt.Columns.Add("COL2", typeof(string)); dt.Columns.Add("COL3", typeof(string)); dt.Columns.Add("COL4", typeof(string)); dt.Columns.Add("COL5", typeof(string)); dt.Columns.Add("COL6", typeof(string)); dt.Columns.Add("COL7", typeof(string)); for (var i = 0; i < 1; i++) AddRow(dt, i); Console.WriteLine("Count:{0:#,#}", dt.Rows.Count); diff(dt); for (var i = 0; i < 10; i++) AddRow(dt, i); Console.WriteLine("Count:{0:#,#}", dt.Rows.Count); diff(dt); for (var i = 0; i < 100; i++) AddRow(dt, i); Console.WriteLine("Count:{0:#,#}", dt.Rows.Count); diff(dt); for (var i = 0; i < 1000; i++) AddRow(dt, i); Console.WriteLine("Count:{0:#,#}", dt.Rows.Count); diff(dt); for (var i = 0; i < 10000; i++) AddRow(dt, i); Console.WriteLine("Count:{0:#,#}", dt.Rows.Count); diff(dt); for (var i = 0; i < 100000; i++) AddRow(dt, i); Console.WriteLine("Count:{0:#,#}", dt.Rows.Count); diff(dt); for (var i = 0; i < 1000000; i++) AddRow(dt, i); Console.WriteLine("Count:{0:#,#}", dt.Rows.Count); diff(dt); } private static void diff(DataTable dt) { DataTable xmlSerializerDt = null; DataTable protoBufDt = null; Stopwatch sw = new Stopwatch(); sw.Start(); using (Stream stream = new FileStream("data.xml", FileMode.Create)) { XmlSerializer serializer = new XmlSerializer(dt.GetType()); serializer.Serialize(stream, dt); } sw.Stop(); Console.WriteLine("XML.Serialize:{0:#,#}ms", sw.ElapsedMilliseconds); Console.WriteLine("File Size:{0:#,#} byte", new FileInfo("data.xml").Length); sw.Reset(); sw.Start(); using (Stream stream = File.OpenRead("data.xml")) { XmlSerializer serializer = new XmlSerializer(dt.GetType()); xmlSerializerDt = (DataTable)serializer.Deserialize(stream); } sw.Stop(); Console.WriteLine("XML.Deserialize:{0:#,#}ms", sw.ElapsedMilliseconds); sw.Reset(); sw.Start(); using (Stream stream = new FileStream("data.data", FileMode.Create)) { DataSerializer.Serialize(stream, dt); } sw.Stop(); Console.WriteLine("ProtoBuf.Serialize:{0:#,#}ms", sw.ElapsedMilliseconds); Console.WriteLine("File Size:{0:#,#} byte", new FileInfo("data.data").Length); sw.Reset(); sw.Start(); protoBufDt = new DataTable(); using (Stream stream = File.OpenRead("data.data")) { protoBufDt.Load(DataSerializer.Deserialize(stream)); } sw.Stop(); Console.WriteLine("ProtoBuf.Deserialize:{0:#,#}ms", sw.ElapsedMilliseconds); DataTable dtDiff = new DataTable(); var wkDiffs2 = dt.AsEnumerable().Except(xmlSerializerDt.AsEnumerable(), DataRowComparer.Default); Console.WriteLine("Difference between original and XmlSerializer:{0}", wkDiffs2.Count()); var wkDiffs3 = dt.AsEnumerable().Except(protoBufDt.AsEnumerable(), DataRowComparer.Default); Console.WriteLine("Difference between original and ProtoBuf:{0}", wkDiffs3.Count()); } private static void AddRow(DataTable dt, int i) { var dr = dt.NewRow(); dr["NO"] = i; dr["NAME"] = "NAME:" + i.ToString(); dr["COL1"] = MD5(i); dr["COL2"] = MD5(i * 2); dr["COL3"] = MD5(i * 3); dr["COL4"] = MD5(i * 4); dr["COL5"] = MD5(i * 5); dr["COL6"] = MD5(i * 6); dr["COL7"] = System.DBNull.Value; dt.Rows.Add(dr); } private static string MD5(int i) { return System.Convert.ToBase64String(new System.Security.Cryptography.MD5CryptoServiceProvider().ComputeHash(System.Text.ASCIIEncoding.ASCII.GetBytes(i.ToString()))); } } }
結果
1回目が遅いですね。JIT(Just-In-Time)によって、2回目以降早くなっているんでしょうか。
レコード件数 | XML.Serialize File Size | ProtoBuf File Size | XML.Serialize | ProtoBuf.Serialize | XML.Deserialize | ProtoBuf.Deserialize |
---|---|---|---|---|---|---|
1 | 1,909 | 258 | 65ms | 32ms | 18ms | 14ms |
11 | 6,112 | 1,938 | 2ms | 1ms | 2ms | 1ms |
111 | 48,515 | 18,828 | 2ms | 1ms | 2ms | 1ms |
1,111 | 476,518 | 189,590 | 10ms | 4ms | 23ms | 4ms |
11,111 | 4,796,521 | 1,908,352 | 105ms | 29ms | 102ms | 82ms |
111,111 | 48,396,524 | 19,280,730 | 851ms | 275ms | 834ms | 835ms |
1,111,111 | 488,396,527 | 194,153,108 | 9,106ms | 3,085ms | 11,941ms | 8,853ms |
結果(コンソール出力)
Count:1 XML.Serialize:65ms File Size:1,909 byte XML.Deserialize:18ms ProtoBuf.Serialize:32ms File Size:258 byte ProtoBuf.Deserialize:14ms Difference between original and XmlSerializer:0 Difference between original and ProtoBuf:0 Count:11 XML.Serialize:2ms File Size:6,112 byte XML.Deserialize:2ms ProtoBuf.Serialize:1ms File Size:1,938 byte ProtoBuf.Deserialize:1ms Difference between original and XmlSerializer:0 Difference between original and ProtoBuf:0 Count:111 XML.Serialize:2ms File Size:48,515 byte XML.Deserialize:1ms ProtoBuf.Serialize:1ms File Size:18,828 byte ProtoBuf.Deserialize:2ms Difference between original and XmlSerializer:0 Difference between original and ProtoBuf:0 Count:1,111 XML.Serialize:10ms File Size:476,518 byte XML.Deserialize:23ms ProtoBuf.Serialize:4ms File Size:189,590 byte ProtoBuf.Deserialize:4ms Difference between original and XmlSerializer:0 Difference between original and ProtoBuf:0 Count:11,111 XML.Serialize:105ms File Size:4,796,521 byte XML.Deserialize:102ms ProtoBuf.Serialize:29ms File Size:1,908,352 byte ProtoBuf.Deserialize:82ms Difference between original and XmlSerializer:0 Difference between original and ProtoBuf:0 Count:111,111 XML.Serialize:851ms File Size:48,396,524 byte XML.Deserialize:834ms ProtoBuf.Serialize:275ms File Size:19,280,730 byte ProtoBuf.Deserialize:835ms Difference between original and XmlSerializer:0 Difference between original and ProtoBuf:0 Count:1,111,111 XML.Serialize:9,106ms File Size:488,396,527 byte XML.Deserialize:11,941ms ProtoBuf.Serialize:3,085ms File Size:194,153,108 byte ProtoBuf.Deserialize:8,853ms Difference between original and XmlSerializer:0 Difference between original and ProtoBuf:0