Safe Haskell | None |
---|---|

Language | Haskell98 |

Low level interface to parallel array filling operators.

## Synopsis

- class Elt a where
- class Target r e where
- class (Source r1 e, Shape sh) => Load r1 sh e where
- class (Source r1 e, Shape sh) => LoadRange r1 sh e where
- loadRangeS :: Target r2 e => Array r1 sh e -> MVec r2 e -> sh -> sh -> IO ()
- loadRangeP :: Target r2 e => Array r1 sh e -> MVec r2 e -> sh -> sh -> IO ()

- fromList :: (Shape sh, Target r e) => sh -> [e] -> Array r sh e
- computeS :: (Load r1 sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e
- computeP :: (Load r1 sh e, Target r2 e, Source r2 e, Monad m) => Array r1 sh e -> m (Array r2 sh e)
- suspendedComputeP :: (Load r1 sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e
- copyS :: (Source r1 e, Load D sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e
- copyP :: (Source r1 e, Source r2 e, Load D sh e, Target r2 e, Monad m) => Array r1 sh e -> m (Array r2 sh e)
- suspendedCopyP :: (Source r1 e, Load D sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e
- now :: (Shape sh, Source r e, Monad m) => Array r sh e -> m (Array r sh e)
- fillLinearS :: Int -> (Int -> a -> IO ()) -> (Int -> a) -> IO ()
- fillChunkedP :: Int -> (Int -> a -> IO ()) -> (Int -> a) -> IO ()
- fillChunkedIOP :: Int -> (Int -> a -> IO ()) -> (Int -> IO (Int -> IO a)) -> IO ()
- fillInterleavedP :: Int -> (Int -> a -> IO ()) -> (Int -> a) -> IO ()
- fillBlock2P :: Elt a => (Int -> a -> IO ()) -> (DIM2 -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
- fillBlock2S :: (Int -> a -> IO ()) -> (DIM2 -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
- fillCursoredBlock2S :: Elt a => (Int -> a -> IO ()) -> (DIM2 -> cursor) -> (DIM2 -> cursor -> cursor) -> (cursor -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
- fillCursoredBlock2P :: Elt a => (Int -> a -> IO ()) -> (DIM2 -> cursor) -> (DIM2 -> cursor -> cursor) -> (cursor -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
- selectChunkedS :: Shape sh => (sh -> a -> IO ()) -> (sh -> Bool) -> (sh -> a) -> sh -> IO Int
- selectChunkedP :: forall a. Unbox a => (Int -> Bool) -> (Int -> a) -> Int -> IO [IOVector a]

# Element types

Element types that can be used with the blockwise filling functions.

This class is mainly used to define the `touch`

method. This is used internally
in the imeplementation of Repa to prevent let-binding from being floated
inappropriately by the GHC simplifier. Doing a `seq`

sometimes isn't enough,
because the GHC simplifier can erase these, and still move around the bindings.

Nothing

Place a demand on a value at a particular point in an IO computation.

touch :: (Generic a, GElt (Rep a)) => a -> IO () #

Place a demand on a value at a particular point in an IO computation.

Generic zero value, helpful for debugging.

zero :: (Generic a, GElt (Rep a)) => a #

Generic zero value, helpful for debugging.

Generic one value, helpful for debugging.

one :: (Generic a, GElt (Rep a)) => a #

Generic one value, helpful for debugging.

## Instances

Elt Bool # | |

Elt Double # | |

Elt Float # | |

Elt Int # | |

Elt Int8 # | |

Elt Int16 # | |

Elt Int32 # | |

Elt Int64 # | |

Elt Word # | |

Elt Word8 # | |

Elt Word16 # | |

Elt Word32 # | |

Elt Word64 # | |

(Elt a, Elt b) => Elt (a, b) # | |

(Elt a, Elt b, Elt c) => Elt (a, b, c) # | |

(Elt a, Elt b, Elt c, Elt d) => Elt (a, b, c, d) # | |

(Elt a, Elt b, Elt c, Elt d, Elt e) => Elt (a, b, c, d, e) # | |

(Elt a, Elt b, Elt c, Elt d, Elt e, Elt f) => Elt (a, b, c, d, e, f) # | |

# Parallel array filling

Class of manifest array representations that can be constructed in parallel.

newMVec :: Int -> IO (MVec r e) #

Allocate a new mutable array of the given size.

unsafeWriteMVec :: MVec r e -> Int -> e -> IO () #

Write an element into the mutable array.

unsafeFreezeMVec :: sh -> MVec r e -> IO (Array r sh e) #

Freeze the mutable array into an immutable Repa array.

deepSeqMVec :: MVec r e -> a -> a #

Ensure the strucure of a mutable array is fully evaluated.

touchMVec :: MVec r e -> IO () #

Ensure the array is still live at this point. Needed when the mutable array is a ForeignPtr with a finalizer.

class (Source r1 e, Shape sh) => Load r1 sh e where #

Compute all elements defined by an array and write them to a manifest target representation.

Note that instances require that the source array to have a delayed
representation such as `D`

or `C`

. If you want to use a pre-existing
manifest array as the source then `delay`

it first.

loadS :: Target r2 e => Array r1 sh e -> MVec r2 e -> IO () #

Fill an entire array sequentially.

loadP :: Target r2 e => Array r1 sh e -> MVec r2 e -> IO () #

Fill an entire array in parallel.

## Instances

Shape sh => Load D sh e # | Compute all elements in an array. |

Shape sh => Load X sh e # | |

Elt e => Load C DIM2 e # | Compute all elements in an rank-2 array. |

(Shape sh, Load r1 sh e) => Load (S r1) sh e # | |

(Shape sh, Load D sh e) => Load (I D) sh e # | |

(LoadRange r1 sh e, Load r2 sh e) => Load (P r1 r2) sh e # | |

class (Source r1 e, Shape sh) => LoadRange r1 sh e where #

Compute a range of elements defined by an array and write them to a fillable representation.

loadRangeS :: Target r2 e => Array r1 sh e -> MVec r2 e -> sh -> sh -> IO () #

Fill a range of an array sequentially.

loadRangeP :: Target r2 e => Array r1 sh e -> MVec r2 e -> sh -> sh -> IO () #

Fill a range of an array in parallel.

fromList :: (Shape sh, Target r e) => sh -> [e] -> Array r sh e #

O(n). Construct a manifest array from a list.

# Converting between representations

computeS :: (Load r1 sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e #

Sequential computation of array elements.

computeP :: (Load r1 sh e, Target r2 e, Source r2 e, Monad m) => Array r1 sh e -> m (Array r2 sh e) #

Parallel computation of array elements.

suspendedComputeP :: (Load r1 sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e #

Suspended parallel computation of array elements.

This version creates a thunk that will evaluate the array on demand.
If you force it when another parallel computation is already running
then you will get a runtime warning and evaluation will be sequential.
Use `deepSeqArray`

and `now`

to ensure that each array is evaluated
before proceeding to the next one.

If unsure then just use the monadic version `computeP`

. This one ensures
that each array is fully evaluated before continuing.

copyS :: (Source r1 e, Load D sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e #

Sequential copying of arrays.

copyP :: (Source r1 e, Source r2 e, Load D sh e, Target r2 e, Monad m) => Array r1 sh e -> m (Array r2 sh e) #

Parallel copying of arrays.

- This is a wrapper that delays an array before calling
`computeP`

. - You can use it to copy manifest arrays between representations.

suspendedCopyP :: (Source r1 e, Load D sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e #

Suspended parallel copy of array elements.

now :: (Shape sh, Source r e, Monad m) => Array r sh e -> m (Array r sh e) #

Monadic version of `deepSeqArray`

.

Forces an suspended array computation to be completed at this point in a monadic computation.

do let arr2 = suspendedComputeP arr1 ... arr3 <- now $ arr2 ...

# Chunked filling

:: Int | Number of elements. |

-> (Int -> a -> IO ()) | Update function to write into result buffer. |

-> (Int -> a) | Fn to get the value at a given index. |

-> IO () |

Fill something sequentially.

- The array is filled linearly from start to finish.

:: Int | Number of elements. |

-> (Int -> a -> IO ()) | Update function to write into result buffer. |

-> (Int -> a) | Fn to get the value at a given index. |

-> IO () |

Fill something in parallel.

- The array is split into linear chunks, and each thread linearly fills one chunk.

:: Int | Number of elements. |

-> (Int -> a -> IO ()) | Update fn to write into result buffer. |

-> (Int -> IO (Int -> IO a)) | Create a fn to get the value at a given index.
The first |

-> IO () |

Fill something in parallel, using a separate IO action for each thread.

- The array is split into linear chunks, and each thread linearly fills one chunk.

# Interleaved filling

:: Int | Number of elements. |

-> (Int -> a -> IO ()) | Update function to write into result buffer. |

-> (Int -> a) | Fn to get the value at a given index. |

-> IO () |

Fill something in parallel.

- The array is split into linear chunks and each thread fills one chunk.

# Blockwise filling

:: Elt a | |

=> (Int -> a -> IO ()) | Update function to write into result buffer. |

-> (DIM2 -> a) | Function to evaluate the element at an index. |

-> Int# | Width of the whole array. |

-> Int# | x0 lower left corner of block to fill |

-> Int# | y0 |

-> Int# | w0 width of block to fill. |

-> Int# | h0 height of block to fill. |

-> IO () |

Fill a block in a rank-2 array in parallel.

- Blockwise filling can be more cache-efficient than linear filling for rank-2 arrays.
- Coordinates given are of the filled edges of the block.
- We divide the block into columns, and give one column to each thread.
- Each column is filled in row major order from top to bottom.

:: (Int -> a -> IO ()) | Update function to write into result buffer. |

-> (DIM2 -> a) | Fn to get the value at the given index. |

-> Int# | Width of the whole array. |

-> Int# | x0 lower left corner of block to fill. |

-> Int# | y0 |

-> Int# | w0 width of block to fill |

-> Int# | h0 height of block to fill |

-> IO () |

Fill a block in a rank-2 array, sequentially.

- Blockwise filling can be more cache-efficient than linear filling for rank-2 arrays.
- The block is filled in row major order from top to bottom.

# Cursored blockwise filling

:: Elt a | |

=> (Int -> a -> IO ()) | Update function to write into result buffer. |

-> (DIM2 -> cursor) | Make a cursor to a particular element. |

-> (DIM2 -> cursor -> cursor) | Shift the cursor by an offset. |

-> (cursor -> a) | Function to evaluate an element at the given index. |

-> Int# | Width of the whole array. |

-> Int# | x0 lower left corner of block to fill. |

-> Int# | y0 |

-> Int# | w0 width of block to fill |

-> Int# | h0 height of block to fill |

-> IO () |

Fill a block in a rank-2 array, sequentially.

- Blockwise filling can be more cache-efficient than linear filling for rank-2 arrays.
- Using cursor functions can help to expose inter-element indexing computations to the GHC and LLVM optimisers.
- Coordinates given are of the filled edges of the block.
- The block is filled in row major order from top to bottom.

:: Elt a | |

=> (Int -> a -> IO ()) | Update function to write into result buffer. |

-> (DIM2 -> cursor) | Make a cursor to a particular element. |

-> (DIM2 -> cursor -> cursor) | Shift the cursor by an offset. |

-> (cursor -> a) | Function to evaluate the element at an index. |

-> Int# | Width of the whole array. |

-> Int# | x0 lower left corner of block to fill |

-> Int# | y0 |

-> Int# | w0 width of block to fill |

-> Int# | h0 height of block to fill |

-> IO () |

Fill a block in a rank-2 array in parallel.

- Blockwise filling can be more cache-efficient than linear filling for rank-2 arrays.
- Using cursor functions can help to expose inter-element indexing computations to the GHC and LLVM optimisers.
- Coordinates given are of the filled edges of the block.
- We divide the block into columns, and give one column to each thread.
- Each column is filled in row major order from top to bottom.

# Chunked selection

:: Shape sh | |

=> (sh -> a -> IO ()) | Update function to write into result. |

-> (sh -> Bool) | See if this predicate matches. |

-> (sh -> a) | .. and apply fn to the matching index |

-> sh | Extent of indices to apply to predicate. |

-> IO Int | Number of elements written to destination array. |

Select indices matching a predicate.

- This primitive can be useful for writing filtering functions.

Select indices matching a predicate, in parallel.

- This primitive can be useful for writing filtering functions.
- The array is split into linear chunks, with one chunk being given to each thread.
- The number of elements in the result array depends on how many threads you're running the program with.