// g++ -Wall -Wextra -O3 -D_REENTRANT source.cc

#include <vector>

#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <pthread.h>
#include <inttypes.h>


typedef sig_atomic_t atomic_t;


inline atomic_t atomic_swap_smp( const atomic_t new_value, atomic_t * the_value )
{
   atomic_t nrv;

   asm volatile ( "\n0:\tlwarx %0,0,%2\n\tstwcx. %1,0,%2\n\tbne- 0b"
		  : "=r" ( nrv )
		  : "r" ( new_value ), "r" ( the_value )
		  : "memory", "cc" );
   return nrv;
}


atomic_t dummy[ 64 ];


inline bool atomic_cas_smp( const atomic_t old_value, const atomic_t new_value, atomic_t * the_value )
{
   atomic_t tmp;

   asm volatile ( "lwarx %0,0,%1"
		  : "=r" ( tmp )
		  : "r" ( the_value )
		  : );

   if ( tmp != old_value )
      return false;

   atomic_t sto;

   asm volatile ( "stwcx. %1,0,%2\n\tmfcr %0"
		  : "=r" ( sto )
		  : "r" ( new_value ), "r" ( the_value )
		  : "memory", "cc" );

   return sto & 0x20000000;
}
   

struct element
{
   union
   {
      element * next;
      atomic_t value;
   };
};


class list_fixed
{
public:
   explicit list_fixed( unsigned prefill )
	 : inList( 0 )
   {
      while ( prefill-- )
	 enqueue( new element );
   }

   void enqueue( element * const inNewLink )
   {
      element * oldListHead;
        
      do {
	 oldListHead = inList;

	 inNewLink->next = oldListHead;
	 asm volatile ( "sync" );  // make write visible

      } while ( ! atomic_cas_smp( ( int32_t )oldListHead, 
				  ( int32_t )inNewLink,
				  ( int32_t * )( & inList ) ) );
   }


   element * dequeue()
   {
      element * nrv = 0;

      while ( true )
      {
	 // obtain the whole list

	 if ( ( nrv = ( element * )atomic_swap_smp( 0, ( atomic_t * )( & inList ) ) ) == 0 )
	    continue;  // try again if empty

	 element * next = nrv->next;

	 if ( next == 0 )
	    return nrv;  // nrv is the only element

	 // atomically put back the rest of the list

	 //	 asm volatile ( "isync" ); // required for "real" operation

	 if ( atomic_cas_smp( 0, ( atomic_t )next, ( atomic_t * )( & inList ) ) )
	    return nrv;  // no other element were enqueued in the meantime

	 // atomic put back failed because inList was modified, do one-by-one

	 do {
	    element * tmp = next->next;
	    enqueue( next );
	    next = tmp;

	 } while ( next );

	 return nrv;
      }
   }

private:
   element * inList;
} __attribute__ (( aligned ( 128 ) ));


const unsigned SIZE = 16;


void * thread( void * arg )
{
   unsigned seed = getpid() + time( 0 ) + unsigned( pthread_self() );

   list_fixed & the_list( * reinterpret_cast< list_fixed * >( arg ) );

   element * elements[ SIZE ];

   memset( elements, 0, sizeof( elements ) );

   while ( true )
   {
      unsigned index = rand_r( & seed ) % SIZE;

      if ( elements[ index ] )
      {
	 the_list.enqueue( elements[ index ] );
	 elements[ index ] = 0;
      }
      else // no element there yet.
      {
	 elements[ index ] = the_list.dequeue();
	 elements[ index ]->value = 1;
      }
   }
   return 0;
}


int main( int, char ** )
{
   pthread_t dummy;

   list_fixed the_list( 256 );

   for ( unsigned i = 0; i < 3; ++i )
      assert( pthread_create( & dummy, 0, & thread, & the_list ) == 0 );

   ::sleep( 1000 );
   _exit( 0 );
   return 0;
}
