Entity Framework是.NET平台下的一种简单易用的ORM框架,它既便于Domain Model和持久层的OO设计,也提高了代码的可维护性。但在使用中发现,有几类业务场景是EF不太擅长的,比如批量写入大量同类数据,为此本人做了一些对比测试,以供大家参考。
现假设我们需要做一个用户批量导入的功能,需要从某处导入1w个User到数据库,本人听说过的常见做法有如下几种:
数据库准备工作:
1 CREATE DATABASE [BulkInsertTest] 2 GO 3 4 USE [BulkInsertTest] 5 GO 6 7 CREATE TABLE [dbo].[User]( 8 [Id] [int] IDENTITY(1,1) NOT NULL, 9 [Name] [nvarchar](50) NOT NULL, 10 [Birthday] [date] NOT NULL, 11 [Gender] [char](1) NOT NULL, 12 [Email] [nvarchar](50) NOT NULL, 13 [Deleted] [bit] NOT NULL, 14 CONSTRAINT [PK_User] PRIMARY KEY CLUSTERED 15 ( 16 [Id] ASC 17 )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 18 ) ON [PRIMARY] 19 20 GO 21 22 CREATE PROCEDURE [dbo].[InsertUser] 23 @Name nvarchar(50) 24 ,@Birthday date 25 ,@Gender char(1) 26 ,@Email nvarchar(50) 27 ,@Deleted bit 28 AS 29 BEGIN 30 INSERT INTO [BulkInsertTest].[dbo].[User] 31 ([Name] 32 ,[Birthday] 33 ,[Gender] 34 ,[Email] 35 ,[Deleted]) 36 VALUES 37 (@Name,@Birthday,@Gender,@Email,@Deleted) 38 39 END 40 41 GOView Code
C#代码:
1 using System; 2 using System.Data; 3 using System.Data.SqlClient; 4 using System.Diagnostics; 5 using System.Linq; 6 using System.Text; 7 8 namespace ConsoleApplication5 9 { 10 class Program 11 { 12 private const string ConnectionString = "Data Source=.;Integrated Security=True;Initial Catalog=BulkInsertTest;"; 13 14 static void Main(string[] args) 15 { 16 for (int i = 0; i < 3; i++) 17 { 18 InsertBySqlBulkCopy(); 19 InsertBySqlCmd(); 20 InsertByStoreProcedure(); 21 InsertByEntityFramework(); 22 } 23 Console.ReadLine(); 24 } 25 26 private static void InsertBySqlCmd() 27 { 28 Stopwatch stopwatch = Stopwatch.StartNew(); 29 using (var connection = new SqlConnection(ConnectionString)) 30 { 31 SqlTransaction transaction = null; 32 connection.Open(); 33 try 34 { 35 transaction = connection.BeginTransaction(); 36 StringBuilder sb = new StringBuilder(); 37 for (int j = 0; j < 10000; j++) 38 { 39 sb.AppendFormat(@"INSERT INTO dbo.[User] ([Name],[Birthday],[Gender],[Email],[Deleted]) 40 VALUES('{0}','{1:yyyy-MM-dd}','{2}','{3}',{4});", "name" + j, DateTime.Now.AddDays(j), 'M', "user" + j + "@abc.com", 0); 41 } 42 var sqlCmd = connection.CreateCommand(); 43 sqlCmd.CommandText = sb.ToString(); 44 sqlCmd.Transaction = transaction; 45 sqlCmd.ExecuteNonQuery(); 46 transaction.Commit(); 47 } 48 catch 49 { 50 if (transaction != null) 51 { 52 transaction.Rollback(); 53 } 54 throw; 55 } 56 } 57 stopwatch.Stop(); 58 Console.WriteLine("SqlCommand time cost: {0}ms", stopwatch.ElapsedMilliseconds); 59 } 60 61 private static void InsertByStoreProcedure() 62 { 63 Stopwatch stopwatch = Stopwatch.StartNew(); 64 using (var connection = new SqlConnection(ConnectionString)) 65 { 66 SqlTransaction transaction = null; 67 connection.Open(); 68 for (int i = 0; i < 10; i++) 69 { 70 try 71 { 72 transaction = connection.BeginTransaction(); 73 StringBuilder sb = new StringBuilder(); 74 for (int j = 0; j < 1000; j++) 75 { 76 sb.AppendFormat(@"EXECUTE [dbo].[InsertUser] '{0}','{1:yyyy-MM-dd}','{2}','{3}',{4};", 77 "name" + j, DateTime.Now.AddDays(j), 'M', "user" + j + "@abc.com", 0); 78 } 79 var sqlCmd = connection.CreateCommand(); 80 sqlCmd.CommandText = sb.ToString(); 81 sqlCmd.Transaction = transaction; 82 sqlCmd.ExecuteNonQuery(); 83 transaction.Commit(); 84 } 85 catch 86 { 87 if (transaction != null) 88 { 89 transaction.Rollback(); 90 } 91 throw; 92 } 93 } 94 } 95 stopwatch.Stop(); 96 Console.WriteLine("StoreProcedure time cost: {0}ms", stopwatch.ElapsedMilliseconds); 97 } 98 99 private static void InsertBySqlBulkCopy() 100 { 101 Stopwatch stopwatch = Stopwatch.StartNew(); 102 103 DataTable table = new DataTable(); 104 table.Columns.Add("Name", typeof (string)); 105 table.Columns.Add("Birthday", typeof (DateTime)); 106 table.Columns.Add("Gender", typeof (char)); 107 table.Columns.Add("Email", typeof (string)); 108 table.Columns.Add("Deleted", typeof (bool)); 109 for (int i = 0; i < 10000; i++) 110 { 111 var row = table.NewRow(); 112 row["Name"] = "name" + i; 113 row["Birthday"] = DateTime.Now.AddDays(i); 114 row["Gender"] = 'M'; 115 row["Email"] = "user" + i + "@abc.com"; 116 row["Deleted"] = false; 117 table.Rows.Add(row); 118 } 119 120 InsertIntoMembers(table); 121 stopwatch.Stop(); 122 Console.WriteLine("SqlBulkCopy time cost: {0}ms", stopwatch.ElapsedMilliseconds); 123 } 124 125 private static void InsertIntoMembers(DataTable dataTable) 126 { 127 using (var connection = new SqlConnection(ConnectionString)) 128 { 129 SqlTransaction transaction = null; 130 connection.Open(); 131 try 132 { 133 transaction = connection.BeginTransaction(); 134 using (var sqlBulkCopy = new SqlBulkCopy(connection, SqlBulkCopyOptions.TableLock, transaction)) 135 { 136 sqlBulkCopy.DestinationTableName = "[User]"; 137 //sqlBulkCopy.ColumnMappings.Add("Id", "Id"); 138 sqlBulkCopy.ColumnMappings.Add("Name", "Name"); 139 sqlBulkCopy.ColumnMappings.Add("Birthday", "Birthday"); 140 sqlBulkCopy.ColumnMappings.Add("Gender", "Gender"); 141 sqlBulkCopy.ColumnMappings.Add("Email", "Email"); 142 sqlBulkCopy.ColumnMappings.Add("Deleted", "Deleted"); 143 144 sqlBulkCopy.WriteToServer(dataTable); 145 } 146 transaction.Commit(); 147 } 148 catch 149 { 150 if (transaction!=null) 151 { 152 transaction.Rollback(); 153 } 154 throw; 155 } 156 } 157 } 158 159 private static void InsertByEntityFramework() 160 { 161 Stopwatch stopwatch = Stopwatch.StartNew(); 162 using (MyDbContext context = new MyDbContext(ConnectionString)) 163 { 164 context.Configuration.AutoDetectChangesEnabled = false; 165 context.Configuration.ValidateOnSaveEnabled = false; 166 for (int i = 0; i < 10000; i++) 167 { 168 context.Users.Add(new User() 169 { 170 Name = "name" + i, 171 Birthday = DateTime.Now.AddDays(i), 172 Gender = "F", 173 Email = "user" + i + "@abc.com", 174 Deleted = false 175 }); 176 } 177 context.SaveChanges(); 178 } 179 180 stopwatch.Stop(); 181 Console.WriteLine("EntityFramework time cost: {0}ms", stopwatch.ElapsedMilliseconds); 182 } 183 } 184 }View Code
测试结果:
结论:从性能上来说,SqlBulkCopy和StoreProcedure胜于拼Sql,前三者可以很好的解决Entity Framework批量操作的性能问题。但另一方面,我们还要从插入的数据总量、代码可维护性等方面加以权衡,比如插入数据量较小或是低压力时间段自动执行的话,EF仍然是不错的选择。而代码可维护性方面ADO.NET的实现明显可读性和重构友好型都弱于EF实现,所以对于需求变动较多的领域模型而言需要更多的抽象和单元测试,以此来确保产品的持续发展。