Note that there are some explanatory texts on larger screens.

plurals
  1. PO
    text
    copied!<p>For more fun, here's a new answer - isolated independent test that's pulling some amazing numbers for write performance on production and does a hell of a lot better avoiding IO blocking and connection management. I'm very interested to see how this works for you as we are getting ridiculous write speeds ( > 7kps).</p> <p>webconfig</p> <pre><code> &lt;system.net&gt; &lt;connectionManagement&gt; &lt;add address="*" maxconnection="48"/&gt; &lt;/connectionManagement&gt; &lt;/system.net&gt; </code></pre> <p>For the test i was using parameters based on volume, so like 25000 items, 24 partitions, batchsize of 100 seems to always be the best, and ref count of 20. This is using TPL dataflow (<a href="http://www.nuget.org/packages/Microsoft.Tpl.Dataflow/" rel="noreferrer">http://www.nuget.org/packages/Microsoft.Tpl.Dataflow/</a>) for BufflerBlock which provides a nice awaitable thread safe table reference pulling. </p> <pre><code>public class DyanmicBulkInsertTestPooledRefsAndAsynch : WebTest, IDynamicWebTest { private int _itemCount; private int _partitionCount; private int _batchSize; private List&lt;TestTableEntity&gt; _items; private GuidIdPartitionSplitter&lt;TestTableEntity&gt; _partitionSplitter; private string _tableName; private CloudStorageAccount _account; private CloudTableClient _tableClient; private Dictionary&lt;string, List&lt;TestTableEntity&gt;&gt; _itemsByParition; private int _maxRefCount; private BufferBlock&lt;CloudTable&gt; _tableRefs; public DyanmicBulkInsertTestPooledRefsAndAsynch() { Properties = new List&lt;ItemProp&gt;(); Properties.Add(new ItemProp("ItemCount", typeof(int))); Properties.Add(new ItemProp("PartitionCount", typeof(int))); Properties.Add(new ItemProp("BatchSize", typeof(int))); Properties.Add(new ItemProp("MaxRefs", typeof(int))); } public List&lt;ItemProp&gt; Properties { get; set; } public void SetProps(Dictionary&lt;string, object&gt; propValuesByPropName) { _itemCount = (int)propValuesByPropName["ItemCount"]; _partitionCount = (int)propValuesByPropName["PartitionCount"]; _batchSize = (int)propValuesByPropName["BatchSize"]; _maxRefCount = (int)propValuesByPropName["MaxRefs"]; } protected override void SetupTest() { base.SetupTest(); ThreadPool.SetMinThreads(1024, 256); ServicePointManager.DefaultConnectionLimit = 256; ServicePointManager.UseNagleAlgorithm = false; ServicePointManager.Expect100Continue = false; _account = CloudStorageAccount.Parse(CloudConfigurationManager.GetSetting("DataConnectionString")); _tableClient = _account.CreateCloudTableClient(); _tableName = "testtable" + new Random().Next(100000); //create the refs _tableRefs = new BufferBlock&lt;CloudTable&gt;(); for (int i = 0; i &lt; _maxRefCount; i++) { _tableRefs.Post(_tableClient.GetTableReference(_tableName)); } var tableRefTask = GetTableRef(); tableRefTask.Wait(); var tableRef = tableRefTask.Result; tableRef.CreateIfNotExists(); ReleaseRef(tableRef); _items = TestUtils.GenerateTableItems(_itemCount); _partitionSplitter = new GuidIdPartitionSplitter&lt;TestTableEntity&gt;(); _partitionSplitter.BuildPartitions(_partitionCount); _items.ForEach(o =&gt; { o.ETag = "*"; o.Timestamp = DateTime.Now; o.PartitionKey = _partitionSplitter.GetPartition(o); }); _itemsByParition = _partitionSplitter.SplitIntoPartitionedSublists(_items); } private async Task&lt;CloudTable&gt; GetTableRef() { return await _tableRefs.ReceiveAsync(); } private void ReleaseRef(CloudTable tableRef) { _tableRefs.Post(tableRef); } protected override void ExecuteTest() { Task.WaitAll(_itemsByParition.Keys.Select(parition =&gt; Task.Factory.StartNew(() =&gt; InsertParitionItems(_itemsByParition[parition]))).ToArray()); } private void InsertParitionItems(List&lt;TestTableEntity&gt; items) { var tasks = new List&lt;Task&gt;(); for (int i = 0; i &lt; items.Count; i += _batchSize) { int i1 = i; var task = Task.Factory.StartNew(async () =&gt; { var batchItems = items.Skip(i1).Take(_batchSize).ToList(); if (batchItems.Select(o =&gt; o.PartitionKey).Distinct().Count() &gt; 1) { throw new Exception("Multiple partitions batch"); } var batchOp = new TableBatchOperation(); batchItems.ForEach(batchOp.InsertOrReplace); var tableRef = GetTableRef.Result(); tableRef.ExecuteBatch(batchOp); ReleaseRef(tableRef); }); tasks.Add(task); } Task.WaitAll(tasks.ToArray()); } protected override void CleanupTest() { var tableRefTask = GetTableRef(); tableRefTask.Wait(); var tableRef = tableRefTask.Result; tableRef.DeleteIfExists(); ReleaseRef(tableRef); } </code></pre> <p>We are currently working on a version that can handle multiple storage accounts to hopefully get some insane speeds. Also, we are running these on 8 core virtual machines for large datasets, but with the new non blocking IO it should run great on a limited vm. Good luck!</p> <pre><code> public class SimpleGuidIdPartitionSplitter&lt;T&gt; where T : IUniqueId { private ConcurrentDictionary&lt;string, string&gt; _partitionByKey = new ConcurrentDictionary&lt;string, string&gt;(); private List&lt;string&gt; _partitions; private bool _bPartitionsBuilt; public SimpleGuidIdPartitionSplitter() { } public void BuildPartitions(int iPartCount) { BuildPartitionIndentifiers(iPartCount); } public string GetPartition(T item) { if (_bPartitionsBuilt == false) { throw new Exception("Partitions Not Built"); } var partKey = item.Id.ToString().Substring(34, 2); return _partitionByKey[partKey]; } public string GetPartition(Guid id) { if (_bPartitionsBuilt == false) { throw new Exception("Partitions Not Built"); } var partKey = id.ToString().Substring(34, 2); return _partitionByKey[partKey]; } #region Helpers private void BuildPartitionIndentifiers(int partitonCount) { var chars = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }.ToList(); var keys = new List&lt;string&gt;(); for (int i = 0; i &lt; chars.Count; i++) { var keyA = chars[i]; for (int j = 0; j &lt; chars.Count; j++) { var keyB = chars[j]; keys.Add(string.Concat(keyA, keyB)); } } var keySetMaxSize = Math.Max(1, (int)Math.Floor((double)keys.Count / ((double)partitonCount))); var keySets = new List&lt;List&lt;string&gt;&gt;(); if (partitonCount &gt; keys.Count) { partitonCount = keys.Count; } //Build the key sets var index = 0; while (index &lt; keys.Count) { var keysSet = keys.Skip(index).Take(keySetMaxSize).ToList(); keySets.Add(keysSet); index += keySetMaxSize; } //build the lookups and datatable for each key set _partitions = new List&lt;string&gt;(); for (int i = 0; i &lt; keySets.Count; i++) { var partitionName = String.Concat("subSet_", i); foreach (var key in keySets[i]) { _partitionByKey[key] = partitionName; } _partitions.Add(partitionName); } _bPartitionsBuilt = true; } #endregion } internal static List&lt;TestTableEntity&gt; GenerateTableItems(int count) { var items = new List&lt;TestTableEntity&gt;(); var random = new Random(); for (int i = 0; i &lt; count; i++) { var itemId = Guid.NewGuid(); items.Add(new TestTableEntity() { Id = itemId, TestGuid = Guid.NewGuid(), RowKey = itemId.ToString(), TestBool = true, TestDateTime = DateTime.Now, TestDouble = random.Next() * 1000000, TestInt = random.Next(10000), TestString = Guid.NewGuid().ToString(), }); } var dupRowKeys = items.GroupBy(o =&gt; o.RowKey).Where(o =&gt; o.Count() &gt; 1).Select(o =&gt; o.Key).ToList(); if (dupRowKeys.Count &gt; 0) { throw new Exception("Dupicate Row Keys"); } return items; } </code></pre> <p>and one more thing - your timing and how are framework was affected point to this <a href="http://blogs.msdn.com/b/windowsazurestorage/archive/2013/08/08/net-clients-encountering-port-exhaustion-after-installing-kb2750149-or-kb2805227.aspx" rel="noreferrer">http://blogs.msdn.com/b/windowsazurestorage/archive/2013/08/08/net-clients-encountering-port-exhaustion-after-installing-kb2750149-or-kb2805227.aspx</a></p>
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload