-
-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathWriteThrough.cs
249 lines (207 loc) · 9.14 KB
/
WriteThrough.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
/*
* Copyright 2021 James Courtney
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System.Buffers.Binary;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.Intrinsics.Arm;
using System.Security.Cryptography;
using System.Text;
using static System.Net.Mime.MediaTypeNames;
namespace Samples.WriteThrough;
/// <summary>
/// Write Through allows you to make updates to an already-serialized FlatBuffer in-place without a full
/// parse or re-serialize. This is extremely performant, especially for large buffers as it avoids
/// copies and allows in-place updates.
///
/// FlatSharp supports Write-Through in limited cases:
///
/// For reference structs, Write-Through is supported on fields inside the struct when:
/// - Serialization method is Progressive or Lazy.
/// - The struct field has been opted into write-through.
/// - The struct field is virtual.
///
/// For value structs, Write-Through is supported when:
/// - Serialization method is Progressive or Lazy.
/// - When in a table: The containing table field is required and enabled for write through.
/// - When in a vector: The containing vector is enabled for write through.
/// </summary>
public class WriteThroughSample : IFlatSharpSample
{
public void Run()
{
ReferenceStructWriteThrough();
ValueStructWriteThrough();
}
/// <summary>
/// Write through using reference structs. In this case, we use a Bloom Filter. Note that this can also be done
/// using value structs.
/// </summary>
public static void ReferenceStructWriteThrough()
{
using SHA256 sha256 = SHA256.Create();
byte[] rawData;
{
// Each block is 128 bytes. For a 1MB filter, we need 8192 blocks.
BloomFilter filter = new BloomFilter(1024 * 1024 / 128);
// Write our bloom filter out to an array. It should be about 1MB in size.
rawData = new byte[BloomFilter.Serializer.GetMaxSize(filter)];
// write our empty bloom filter into the rawData buffer.
BloomFilter.Serializer.Write(rawData, filter);
byte[] sha = sha256.ComputeHash(rawData);
Console.WriteLine($"Original Hash: {Convert.ToBase64String(sha)}");
}
// This bloom filter will let us do in-place updates to the 'rawData' array,
// eliminating the need to re-serialize on each update. If you want extra credit,
// consider using memory mapped files here to automatically flush to disk!
BloomFilter writeThroughFilter = BloomFilter.Serializer.Parse(rawData);
// Add a bunch of random keys to our bloom filter. Keep in mind that we're
// doing this *in place*. No Parse->Update->Re-serialize here. All of these operations
// are happening directly into the 'rawData' array up above.
List<string> keysInFilter = new List<string>();
for (int i = 0; i < 25_000; ++i)
{
string key = Guid.NewGuid().ToString();
keysInFilter.Add(key);
Assert.True(!writeThroughFilter.MightContain(key), "Filter shouldn't contain any keys yet");
writeThroughFilter.Add(key);
Assert.True(writeThroughFilter.MightContain(key), "Filter should contain that key now.");
}
Console.WriteLine($"Final Hash: {Convert.ToBase64String(sha256.ComputeHash(rawData))}");
// Let's prove the writethrough worked now. Let's re-parse the data and verify that our keys
// are still there!
writeThroughFilter = BloomFilter.Serializer.Parse(rawData);
foreach (var key in keysInFilter)
{
Assert.True(
writeThroughFilter.MightContain(key),
"Filter still contains the key! WriteThrough worked :)");
}
// For some fun, we can also test some keys that aren't in there.
for (int i = 0; i < 1000; ++i)
{
Assert.True(
!writeThroughFilter.MightContain(Guid.NewGuid().ToString()),
"Filter doesn't contain what we don't expect.");
}
}
/// <summary>
/// Shows value struct write through. In this case, we define a path and add points to it.
/// </summary>
public static void ValueStructWriteThrough()
{
byte[] data;
{
// Build our "empty" path with capacity for 10K points.
// Note that the vector has 10,000 items in it, but we
// track the "Used" length ourselves. This is because
// writethrough cannot add to the end of a list. You can
// only update items that are already in the list.
Path path = new Path()
{
NumPoints = new MutableInt { Value = 0 },
Points = new List<Point>(),
};
// Give capacity for 10,000 points.
for (int i = 0; i < 10000; ++i)
{
path.Points.Add(new Point());
}
data = new byte[Path.Serializer.GetMaxSize(path)];
Path.Serializer.Write(data, path);
}
Path parsed = Path.Serializer.Parse(data, FlatBufferDeserializationOption.Progressive);
// Add 100 points.
MutableInt length = parsed.NumPoints;
for (int i = 0; i < 100; ++i)
{
// Update the vector. This operation writes through to the underlying buffer.
parsed.Points![length.Value++] = new Point { X = i, Y = i, Z = i };
}
// After the loop, update the length:
parsed.NumPoints = length;
// Reparse the buffer to show that we actually wrote some points.
parsed = Path.Serializer.Parse(data, FlatBufferDeserializationOption.Lazy);
Assert.True(parsed.NumPoints.Value == 100, "100 points, right?");
// Print the last point we wrote, followed by the next empty slot in the vector.
for (int i = 97; i <= 103; ++i)
{
Point point = parsed.Points![i];
Console.WriteLine($"Point {i}: ({point.X}, {point.Y}, {point.Z})");
}
}
}
/// <summary>
/// FlatSharp generates the data definitions. We are adding the methods in this partial declaration.
/// </summary>
public partial class BloomFilter
{
// Bloom filters use multiple hash functions to figure out which bits to set.
// These were chosen arbitrarily by what was available in .NET. Ideally, you'd
// use murmur3 or something appropriate for this use case :)
private readonly HashAlgorithm[] hashAlgorithms = new HashAlgorithm[] { SHA256.Create(), SHA1.Create(), MD5.Create() };
[SetsRequiredMembers]
public BloomFilter(int blockCount)
{
this.Blocks = new List<Block>(blockCount);
for (int i = 0; i < blockCount; ++i)
{
this.Blocks.Add(new Block());
}
}
public int BlockCount => this.Blocks.Count;
public bool MightContain(string key)
{
byte[] keyBytes = this.GetKeyBytes(key);
for (int i = 0; i < this.hashAlgorithms.Length; ++i)
{
this.GetBlockAddress(keyBytes, this.hashAlgorithms[i], out Block block, out int blockIndex, out ulong blockMask);
if ((block.Data[blockIndex] & blockMask) == 0)
{
return false;
}
}
return true;
}
public bool Add(string key)
{
byte[] keyBytes = this.GetKeyBytes(key);
for (int i = 0; i < this.hashAlgorithms.Length; ++i)
{
this.GetBlockAddress(keyBytes, this.hashAlgorithms[i], out Block block, out int blockIndex, out ulong blockMask);
block.Data[blockIndex] |= blockMask;
}
return true;
}
private byte[] GetKeyBytes(string key)
{
// Not efficient. But hey, this is sample code!
return Encoding.UTF8.GetBytes(key);
}
private void GetBlockAddress(byte[] key, HashAlgorithm hashAlgorithm, out Block block, out int blockIndex, out ulong blockMask)
{
byte[] hash = hashAlgorithm.ComputeHash(key);
// Use the hash to get the address of the bit we're looking for.
// First 4 bytes finds the block.
int hash1 = BinaryPrimitives.ReadInt32LittleEndian(hash) & int.MaxValue; // positive-valued int32 hash.
// Second 4 bytes finds the offset within the block.
int hash2 = BinaryPrimitives.ReadInt32LittleEndian(hash.AsSpan().Slice(4)) & int.MaxValue;
// Third 4 bytes finds the bit mask within the ulong.
int hash3 = BinaryPrimitives.ReadInt32LittleEndian(hash.AsSpan().Slice(8)) & int.MaxValue;
block = this.Blocks[hash1 % this.BlockCount];
blockIndex = hash2 % block.Data.Count;
int positionInBlock = hash3 % 64;
blockMask = ((ulong)1) << positionInBlock;
}
}