blob: e37a0130aa3fd9cefe2bc771df7d2dc22de8b5ea [file] [log] [blame]
#ifndef LRT__FRAMEBUFFER_HXX
#define LRT__FRAMEBUFFER_HXX
#define DBG_DISPLAY(a)
// #define DBG_DISPLAY(a) a
#include "RTTL/common/RTInclude.hxx"
#include "RTTL/common/RTVec.hxx"
#define GL_GLEXT_PROTOTYPES
#ifdef THIS_IS_APPLE
#include <glut.h>
#else
#include <GL/glut.h>
#endif
#if defined(_WIN32)
extern "C" {
#include "glext_win.h"
}
#else
#if !defined(sun) && !defined(__sun) && !defined(__sun__)
#include "GL/glext.h"
#endif
#endif
#define BEGIN_NAMESPACE {
#define END_NAMESPACE }
using namespace std;
namespace LRT BEGIN_NAMESPACE
typedef RTTL::RTVec2i vec2i;
typedef RTTL::RTVec2f vec2f;
/*! abstract base class of a frame buffer. plain old array of pixels,
RGBA, RGBA, RGBA, etc. No Zordering or anything like that. note that
this class just provides an interface for resizing the buffer and
functionality for fast writing to it. memory allocation will be done
in the base classes (mapping fb to a PBO, or allocating it,
depending on implementation)
\note to allow fast writing, we currently force frame buffer res to
be multiple of 4 in each direction
*/
struct RGBAucharFrameBuffer
{
/*! class that encapsulates all framebuffer related options */
struct Options {
/*! if set to true, FrameBuffer::create() will try to allocate a
PBO framebuffer. This may still fail (e.g., if PBO support not
comiled in, or if HW doesnt support i), but at least it will
try. If set to false, we're not even trying, and will directly
revert toa non-PBO framebuffer */
static bool usePBOs;
/*! if set to true, FrameBuffer::create() will use a linear region
in main memory as framebuffer, mostly useful for debugging or
no display rendering */
static bool useMemoryFB;
/*! default resolution of frame buffer. to be used by the
application ... */
static vec2i defaultRes;
};
vec2i res; /*! resultion (width x height) of frame buffer */
unsigned char *fb;
RGBAucharFrameBuffer()
: res(0,0), fb(NULL)
{}
/*! write a block of pixels. */
_INLINE void writeBlock(const int x0, const int y0,
const int dx, const int dy,
const sse_i *four4x8PixelsEach) const;
_INLINE void prefetchBlock(const int x0, const int y0,
const int dx, const int dy) const;
static RGBAucharFrameBuffer *create();
/*! resize the frame buffer.
\note the 'externalFBptr' doesn't make sense in the abstract base
class, but I don't want to think too hard about any cleaner concepts
right now ... */
virtual void resize(int newX, int newY)
{
// framebuffer must be at least 32x32, and at least a multiple of
// 4 in each dimension ... (else can't write in SIMD ...)
newX = max(32,nextMultipleOf<4>(newX));
newY = max(32,nextMultipleOf<4>(newY));
vec2i newRes(newX,newY);
if (res != newRes) {
res = newRes;
}
};
/* re-allocate fb pointer. */
virtual void startNewFrame() = 0;
virtual void doneWithFrame() = 0;
virtual void display() = 0;
};
/*! just easier to type... */
typedef RGBAucharFrameBuffer FrameBuffer;
// =======================================================
// =======================================================
// =======================================================
// =======================================================
// IMPLEMENTATION:
// =======================================================
// =======================================================
// =======================================================
// =======================================================
/*! write a rectangular block of pixels into frame buffer
will be the same for both PBO and non-pbo version, just what 'fb'
points to will be different ...
*/
_INLINE void RGBAucharFrameBuffer::writeBlock(const int x0, const int y0,
const int dx, const int dy,
const sse_i *four4x8PixelsEach)
const
{
// assert(fb);
// assert(is_aligned<16>(fb));
// assert(is_divisible<4>(dx));
// assert(is_divisible<4>(dy));
unsigned int *const fb_as_int32 = (unsigned int *)fb;
#if 1
// unsigned int *const RESTRICT fb_as_int32 = (unsigned int *)fb;
// const unsigned int *const RESTRICT fb_as_int32 = (unsigned int *)fb;
if (__builtin_expect(dx == 8 && dy == 8,1))
{
unsigned int *start = (unsigned int*)&fb_as_int32[y0*res.x+x0];
#pragma unroll(8)
for (int y=0;y<8;y++,start+=res.x)
{
//_mm_stream_si128((sse_i*)&start[0],four4x8PixelsEach[y*2+0]);
//_mm_stream_si128((sse_i*)&start[4],four4x8PixelsEach[y*2+1]);
_mm_stream_ps((float*)&start[0],cast(four4x8PixelsEach[y*2+0]));
_mm_stream_ps((float*)&start[4],cast(four4x8PixelsEach[y*2+1]));
//_mm_prefetch((char*)(start+res.x),_MM_HINT_NTA);
}
return;
}
else
cout << dx << " " << dy << endl;
#endif
#if 0
if (dx & 0xf)
{
// packet is not a multiple of 16 wide.... copy 4 pixels at a time...
for (int y=y0; y<y0+dy; y++)
{
float *const line = (float *)&fb_as_int32[y*res.x];
for (int x=x0; x<x0+dx; x+=4)
{
_mm_stream_ps(&line[x],cast(*four4x8PixelsEach++));
}
}
}
else
{
for (int y=y0; y<y0+dy; y++)
{
float *const line = (float *)&fb_as_int32[y*res.x];
for (int x=x0; x<x0+dx; x+=16)
{
_mm_stream_ps(&line[x+ 0],cast(*four4x8PixelsEach++));
_mm_stream_ps(&line[x+ 4],cast(*four4x8PixelsEach++));
_mm_stream_ps(&line[x+ 8],cast(*four4x8PixelsEach++));
_mm_stream_ps(&line[x+12],cast(*four4x8PixelsEach++));
}
}
}
#endif
}
_INLINE void RGBAucharFrameBuffer::prefetchBlock(const int x0, const int y0,
const int dx, const int dy)
const
{
unsigned int *const fb_as_int32 = (unsigned int *)fb;
if (__builtin_expect(dx == 8 && dy == 8,1))
{
unsigned int *start = (unsigned int*)&fb_as_int32[y0*res.x+x0];
#pragma unroll(8)
//for (int y=0;y<8;y++,start+=res.x) _mm_prefetch((char*)start,_MM_HINT_NTA);
return;
}
}
END_NAMESPACE
#endif