Code snippet from an evening spent playing with Parallel.For to speed up image processing on an 8 core PC
static long[] averageTicks = new long[100]; static int currentOffset = 0; byte[] byteArray; public byte[] GetOpenGLBuffer() { int powWidth = Utility.GetNextPowerOfTwo(width); int powHeight = Utility.GetNextPowerOfTwo(height); if(byteArray == null) byteArray = new byte[powWidth * 4 * powHeight]; Stopwatch sw = new Stopwatch(); sw.Start(); // ~17500 ticks //int offset = 0; //int floatOffset = 0; //for (int y = 0; y < height; y++) //{ // int offset2 = offset; // for (int x = 0; x < width * 4; ++x) // { // byteArray[offset2++] = (byte)(left[floatOffset++] * 255.0f); // } // offset += powWidth * 4; //} // ~4400 ticks (6500 if I do all four casts) Parallel.For(0, height, y => { int offset = powWidth * 4 * y; int floatOffset = width * 4 * y; for (int x = 0; x < width; x++) { byteArray[offset] = (byte)(left[floatOffset] * 255.0f); byteArray[offset+1] = (byte)(left[floatOffset+1] * 255.0f); byteArray[offset+2] = (byte)(left[floatOffset+2] * 255.0f); offset+=4; floatOffset += 4; } }); averageTicks[currentOffset] = sw.ElapsedTicks; long av = 0; for (int i = 0; i < 100; ++i) av += averageTicks[i]; av /= 100; Console.WriteLine(av); ++currentOffset; if (currentOffset == 100) currentOffset = 0; return byteArray; }