Programming Example

This sample program uses the F32vec4 class to average the elements of a 20 element floating point array.

// Include Streaming SIMD Extension Class Definitions

#include <fvec.h>

 

// Shuffle any 2 single precision floating point from a

// into low 2 SP FP and shuffle any 2 SP FP from b

// into high 2 SP FP of destination

#define SHUFFLE(a,b,i) (F32vec4)_mm_shuffle_ps(a,b,i)

#include <stdio.h>

#define SIZE 20

 

// Global variables

float result;

_MM_ALIGN 16 float array[SIZE];

 

//*****************************************************

// Function: Add20ArrayElements

// Add all the elements of a 20 element array

//*****************************************************

 

void Add20ArrayElements (F32vec4 *array, float *result)

{

   F32vec4 vec0, vec1;

   vec0 = _mm_load_ps ((float *) array); // Load array's first 4 floats

 

   //*****************************************************

   // Add all elements of the array, 4 elements at a time

   //******************************************************

 

   vec0 += array[1]; // Add elements 5-8

   vec0 += array[2]; // Add elements 9-12

   vec0 += array[3]; // Add elements 13-16

   vec0 += array[4]; // Add elements 17-20

 

   //*****************************************************

   // There are now 4 partial sums.

   // Add the 2 lowers to the 2 raises,

   // then add those 2 results together

   //*****************************************************

 

   vec1 = SHUFFLE(vec1, vec0, 0x40);

   vec0 += vec1;

   vec1 = SHUFFLE(vec1, vec0, 0x30);

   vec0 += vec1;

   vec0 = SHUFFLE(vec0, vec0, 2);

   _mm_store_ss (result, vec0); // Store the final sum

}

 

void main(int argc, char *argv[])

{

   int i;

   // Initialize the array

   for (i=0; i < SIZE; i++)

   {

      array[i] = (float) i;

   }

 

   // Call function to add all array elements

   Add20ArrayElements (array, &result);

 

   // Print average array element value

   printf ("Average of all array values = %f\n", result/20.);

   printf ("The correct answer is %f\n\n\n", 9.5);

}