CUDA Fluid Simulation: Fluid.cu Source File

00001 
00057 /*
00058  * Copyright 2009 StingNET.  All rights reserved.
00059  *
00060  * Any use of this source code in individual and commercial software must
00061  * include, in the user documentation and internal comments to the code,
00062  * credit to the author Dirk Hekhuis.
00063  *
00064  * The code in this program is based on code from 
00065  * Jos Stam's paper 'Real-Time Fluid Dynamics for Games'.
00066  */
00067 
00068 #include <stdio.h>
00069 #include <stdlib.h>
00070 #include <cutil.h>
00071 #include "glut.h"
00072 
00073 #define IX(i,j) ((i)+(N+2)*(j)) 
00074 #define SWAP(x0,x) {float *tmp = x0; x0 = x; x = tmp;} 
00075 #define DIM 128 
00076 #define SIZE DIM*DIM 
00077 #define max(x,y) x>y?x:y 
00078 #define BLOCK_SIZE 32 
00079 
00080 
00090 void printArray (int N, float *x);
00091 
00092 
00100 void init (void);
00101 
00102 
00109 void display (void);
00110 
00111 
00117 void idle (void);
00118 
00119 
00140 void reshape (int x, int y);
00141 
00142 
00153 void keyboard (unsigned char key, int x, int y);
00154 
00155 
00167 void click (int button, int state, int x, int y);
00168 
00169 
00180 void motion (int x, int y);
00181 
00182 
00192 void timer (int value);
00193 
00194 
00207 void updateFluid (int button, int mouseMove, int state);
00208 
00209 
00221 __global__ void gpuKernelAdd_Source (int N, float *gpuX, float *gpuS, float dt);
00222 
00223 
00243 __global__ void gpuKernelDiffuse(int N,int b, float *gpuX, float *gpuX0, float diff,float dt,int *gpuBoundary);
00244 
00245 
00262 __global__ void gpuKernelAdvect(int N, int b, float *gpuD, float *gpuD0, float *gpuU, float *gpuV, float dt, int *gpuBoundary);
00263 
00264 
00279 __global__ void gpuKernelProjectStep1(int N, float *gpuU, float *gpuV, float *gpuP, float *gpuDiv, int *gpuBoundary);
00280 
00281 
00294 __global__ void gpuKernelProjectStep2(int N, float *gpuP, float *gpuDiv, int *gpuBoundary);
00295 
00296 
00310 __global__ void gpuKernelProjectStep3(int N, float *gpuU, float *gpuV, float *gpuP, int *gpuBoundary);
00311 
00312 
00319 void gpuCalcFluid(void);
00320 
00321 
00322 float u[SIZE]; 
00323 float v[SIZE]; 
00324 float u_prev[SIZE]; 
00325 float v_prev[SIZE]; 
00326 float dens[SIZE]; 
00327 float dens_prev[SIZE]; 
00328 int boundary[SIZE]; 
00329 int wWidth = max(512,DIM); 
00330 int wHeight = max(512,DIM); 
00331 float cellSize; 
00332 float diff = 0.5f; 
00333 float dt = .2f; 
00334 float visc = 0.0f; 
00335 int stepToggle = 0; 
00336 int runToggle = 1; 
00337 int consoleToggle = 0; 
00338 int fluidToggle = 0; 
00339 int densityToggle = 1; 
00340 int velocityToggle = 0; 
00341 int gridToggle = 0; 
00342 int boundaryToggle = 1; 
00343 int drawBoundaryToggle = 0; 
00344 int id; 
00345 int lastX = 0; 
00346 int lastY = 0; 
00347 int currentX = 0; 
00348 int currentY = 0; 
00349 int clicked = 0; 
00350 int leftClick = 0; 
00351 int N = DIM-2; 
00352 int emitterLoc = DIM/2; 
00353 int frameRate = 60; 
00354 int updown = 0; 
00355 int lastCellX; 
00356 int lastCellY; 
00357 float totalDensity = 0; 
00358 
00359 
00360 int main(int argc, char *argv[]) {
00361         int i;
00362 
00363         // Initialize arrays to 0
00364         for (i=0; i<SIZE; i++)
00365         {
00366                 u[i] = 0.0f;
00367                 u_prev[i] = 0.0f;
00368                 v[i] = 0.0f;
00369                 v_prev[i] = 0.0f;
00370                 dens[i] = 0.0f;
00371                 dens_prev[i] = 0.0f;
00372                 boundary[i] = 0;
00373         }
00374         
00375         // Initialize GLUT
00376         glutInit(&argc, argv);
00377         glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
00378         glutInitWindowSize(wWidth, wHeight);
00379         glutCreateWindow("Compute Stable Fluids");
00380         init();
00381         glutDisplayFunc(display);
00382         glutReshapeFunc(reshape);
00383         glutIdleFunc(idle);
00384         glutKeyboardFunc(keyboard);
00385         glutMouseFunc(click);
00386         glutMotionFunc(motion);
00387         glutTimerFunc(100, timer, frameRate);
00388 
00389         CUDA_SAFE_CALL(cudaSetDevice(2));
00390 
00391         // Start GLUTs main looop
00392         glutMainLoop();
00393 
00394         return 0;
00395 }
00396 
00397 void printArray (int N, float *x)
00398 {
00399         int i, j;
00400 
00401         // Outer loop for the row
00402         for (i=N+1; i>=0; i--)
00403         {
00404                 // Inner loop for the column
00405                 for (j=0; j<N+2; j++)
00406                 {
00407                         printf("%f ", x[IX(i,j)]);
00408                 }
00409 
00410                 // Star printing on the next line
00411                 printf("\n");
00412         }
00413 }
00414 
00415 void init (void)
00416 {
00417         // Set the size of the each cell to be drawn
00418         cellSize = wWidth/N;
00419 
00420         // Create a list for OpenGL
00421         id = glGenLists(1);
00422 
00423         // Create a display list to hold the cell to be drawn
00424         glNewList(id, GL_COMPILE);
00425         {
00426                 glBegin(GL_QUADS);
00427                 {
00428                         glVertex3f(-cellSize/2.0f, cellSize/2.0f, 0);
00429                         glVertex3f(-cellSize/2.0f, -cellSize/2.0f, 0);
00430                         glVertex3f(cellSize/2.0f, -cellSize/2.0f, 0);
00431                         glVertex3f(cellSize/2.0f, cellSize/2.0f, 0);
00432                 }
00433                 glEnd();
00434         }
00435         glEndList();
00436 
00437         // Set the clear color to black
00438         glClearColor(0x00, 0x00, 0x00, 1);
00439 }
00440 
00441 void display (void)
00442 {
00443         int i, j;
00444         float c;
00445 
00446         // Clear the screen for the next drawing
00447         glClear (GL_COLOR_BUFFER_BIT);
00448 
00449         // The program is running or being stepped thru
00450         if(runToggle || stepToggle)
00451         {
00452                 // Calculations
00453                 gpuCalcFluid();
00454 
00455                 // Calculate the total density
00456                 totalDensity = 0;
00457                 for (i=N; i>=1; i--)
00458                 {
00459                         for (j=1; j<=N; j++)
00460                         {
00461                                 totalDensity += dens[IX(i,j)];
00462                         }
00463                 }
00464 
00465                 /* Print to the console */
00466                 if (consoleToggle)
00467                 {
00468                         // Print out the densisty field
00469                         printf("\n\nDensity Field\n");
00470                         printArray(N, dens);
00471 
00472                         // Print out the velocity field in the x direction
00473                         printf("\n\nu Field\n");
00474                         printArray(N, u);
00475 
00476                         // Print out the velocity field in the y direction
00477                         printf("\n\nv Field\n");
00478                         printArray(N, v);
00479 
00480                         // Print out the total density in the simulation
00481                         printf("\n\nTotal Density: %f\n", totalDensity);
00482                 }
00483 
00484                 // Reset the step toggle
00485                 stepToggle = 0;
00486         }
00487 
00488         // Loop thru all the elements of the arrays
00489         for (i=N; i>=1; i--)
00490         {
00491                 for (j=1; j<=N; j++)
00492                 {
00493                         /* Draws the boundary */
00494                         if(boundaryToggle)
00495                         {
00496                                 // If there is a boundary make the cell blue
00497                                 if(boundary[IX(i,j)] == 1)
00498                                         glColor3ub(0x00, 0x00, 0xFF);
00499                                 // If there is no boundary make the cell black
00500                                 else
00501                                         glColor3ub(0x00, 0x00, 0x00);
00502 
00503                                 glPushMatrix();
00504                                 {
00505                                         // Move the cell into the correct position on the screen
00506                                         glTranslatef(((j)*cellSize)+(cellSize/2), ((i)*cellSize)+(cellSize/2), 0);
00507 
00508                                         // Draw the cell
00509                                         glCallList(id);
00510                                 }
00511                                 glPopMatrix();
00512                         }
00513 
00514                         /* Draws the density field */
00515                         if(densityToggle)
00516                         {
00517                                 if(dens[IX(i,j)] > 0)
00518                                 {
00519                                         // Set the color of the cell based on the amount of density in the cell
00520                                         c = (dens[IX(i,j)]);
00521                                         if(c == 0)
00522                                                 glColor3f(0,0,0);
00523                                         else if(c < .33)
00524                                                 glColor3f(0,0,c);
00525                                         else if(c < .66)
00526                                                 glColor3f(0,c,0);
00527                                         else
00528                                                 glColor3f(c,0,0); 
00529                                         
00530                                         glPushMatrix();
00531                                         {
00532                                                 // Move the cell into the correct position on the screen
00533                                                 glTranslatef(((j)*cellSize)+(cellSize/2), ((i)*cellSize)+(cellSize/2), 0);
00534 
00535                                                 // Draw the cell
00536                                                 glCallList(id);
00537                                         }
00538                                         glPopMatrix();
00539                                 }
00540                         }
00541 
00542                         /* Draws the velocity field */
00543                         if(velocityToggle)
00544                         {
00545                                 // Set the color of the velocity lines to red
00546                                 glColor3ub(0xFF, 0x00, 0x00);
00547                                 
00548                                 glPushMatrix();
00549                                 {
00550                                         float x, y;
00551 
00552                                         // The x position of the end of the line
00553                                         x = u[IX(i,j)] * cellSize * 10;
00554 
00555                                         // The y position of the end of the line
00556                                         y = v[IX(i,j)] * cellSize * 10;
00557 
00558                                         // Move the line to the center of the cell
00559                                         glTranslatef(((j)*cellSize)+(cellSize/2), ((i)*cellSize)+(cellSize/2), 0);
00560 
00561                                         // Draw the line
00562                                         glBegin(GL_LINES);
00563                                         {
00564                                                 glVertex3f(0, 0, 0);
00565                                                 glVertex3f(x, y, 0);
00566                                         }
00567                                         glEnd();
00568                                 }
00569                                 glPopMatrix();
00570                         }
00571 
00572                         /* Draws the grid */
00573                         if(gridToggle)
00574                         {
00575                                 // Set the color of the grid to green
00576                                 glColor3ub(0x00, 0xFF, 0x00);
00577 
00578                                 glPushMatrix();
00579                                 {
00580                                         glPushAttrib(GL_POLYGON_BIT);
00581                                         {
00582                                                 // Show the outline of the cell instead of filling it in
00583                                                 glPolygonMode (GL_FRONT, GL_LINE);
00584 
00585                                                 // Move the cell into the correct position on the screen
00586                                                 glTranslatef(((j)*cellSize)+(cellSize/2), ((i)*cellSize)+(cellSize/2), 0);
00587 
00588                                                 // Draw the cell
00589                                                 glCallList(id);
00590                                         }
00591                                         glPopAttrib();
00592                                 }
00593                                 glPopMatrix();
00594                         }
00595                 }
00596         }
00597 
00598         // Swap the back buffer with the top buffer to draw the new image
00599         glutSwapBuffers();
00600 }
00601 
00602 void idle (void)
00603 {
00604         // Calls display()
00605         glutPostRedisplay();
00606 }
00607 
00608 
00609 void reshape (int x, int y)
00610 {
00611         // Set the width to the new window width
00612         wWidth = x;
00613 
00614         // Set the height to the new window height
00615         wHeight = y;
00616 
00617         // Delete the current display list
00618         glDeleteLists(id,0);
00619 
00620         // Called to create a new display list with the new cell size
00621         init();
00622 
00623         // Set up the camera
00624         glViewport(0, 0, x, y);
00625         glMatrixMode(GL_PROJECTION);
00626         glLoadIdentity();
00627         glOrtho(0, wWidth, 0, wHeight, 0, 1);
00628         glMatrixMode(GL_MODELVIEW);
00629         glLoadIdentity();
00630 
00631         // Calls display()
00632         glutPostRedisplay();
00633 }
00634 
00635 void keyboard (unsigned char key,int x, int y)
00636 {
00637         int i;
00638 
00639         switch(key)
00640         {
00641                 // Esc key was pressed
00642                 case 27:
00643                         exit(0);
00644 
00645                 // c key was pressed
00646                 case 'c':
00647                         consoleToggle = !consoleToggle;
00648                         break;
00649 
00650                 // s key was pressed
00651                 case 's':
00652                         runToggle = !runToggle;
00653                         break;
00654 
00655                 // f key was pressed
00656                 case 'f':
00657                         dens_prev[IX(1,emitterLoc)] = N * cellSize;
00658                         v_prev[IX(1,emitterLoc)] = cellSize;
00659                         break;
00660 
00661                 // d key was pressed
00662                 case 'd':
00663                         densityToggle = !densityToggle;
00664                         break;
00665 
00666                 // v key was pressed
00667                 case 'v':
00668                         velocityToggle = !velocityToggle;
00669                         break;
00670 
00671                 // g key was pressed
00672                 case 'g':
00673                         gridToggle = !gridToggle;
00674                         break;
00675 
00676                 // b key was pressed
00677                 case 'b':
00678                         boundaryToggle = !boundaryToggle;
00679                         break;
00680 
00681                 // shift b key was pressed
00682                 case 'B':
00683                         drawBoundaryToggle = !drawBoundaryToggle;
00684                         break;
00685 
00686                 // r key was pressed
00687                 case 'r':
00688                         // Initialize arrays to 0
00689                         for (i=0; i<SIZE; i++)
00690                         {
00691                                 u[i] = 0.0f;
00692                                 u_prev[i] = 0.0f;
00693                                 v[i] = 0.0f;
00694                                 v_prev[i] = 0.0f;
00695                                 dens[i] = 0.0f;
00696                                 dens_prev[i] = 0.0f;
00697                                 boundary[i] = 0;
00698                         }
00699                         break;
00700 
00701                 // Spacebar was pressed
00702                 case 32:
00703                         stepToggle = 1;
00704                         break;
00705         }
00706 
00707         // Calls display()
00708         glutPostRedisplay();
00709 }
00710 
00711 void click (int button, int state, int x, int y)
00712 {
00713         updown = state;
00714 
00715         // The left mouse button is pushed down
00716         if(state == 0)
00717         {
00718                 lastCellX = -1;
00719                 lastCellY = -1;
00720         }
00721 
00722         // Set the last position of the mouse
00723         lastX = currentX;
00724         lastY = currentY;
00725 
00726         // Set the current position of the mouse
00727         currentX = x;
00728         currentY = y;
00729 
00730         // Call to interact with gui and fluid
00731         updateFluid(button, 0, updown);
00732 }
00733 
00734 void motion (int x, int y)
00735 {
00736         // The left mouse button is pushed down
00737         if(updown == 0)
00738         {
00739                 lastX = currentX;
00740                 lastY = currentY;
00741 
00742                 currentX = x;
00743                 currentY = y;
00744 
00745                 // Call to interact with gui and fluid
00746                 updateFluid(0, 1, updown);
00747         }
00748 }
00749 
00750 void timer(int value) 
00751 {
00752         // Calls display()
00753         glutPostRedisplay();
00754 
00755         // Create a new timer to replace the current timer
00756         glutTimerFunc(1000/frameRate, timer, value);
00757 }
00758 
00759 void updateFluid (int button, int mouseMove, int state)
00760 {
00761         float fx, fy;
00762         int nx, ny;
00763 
00764         // Find the cell that was clicked on
00765         fx = currentX / (float)wWidth;
00766         fy = (wHeight - currentY) / (float)wHeight;
00767         nx = (int)(fx * DIM);
00768         ny = (int)(fy * DIM);
00769 
00770         // If the mouse goes out of fluid box set it to the edge of the box
00771         if(nx < 1)
00772                 nx = 1;
00773         else if(nx > N)
00774                 nx = N;
00775         if(ny < 1)
00776                 ny = 1;
00777         else if(ny > N)
00778                 ny = N;
00779 
00780         // Right mouse button was clicked
00781         if(button == 2)
00782         {
00783                 // Add density to the cell that was clicked
00784                 dens_prev[IX(ny,nx)] = N * cellSize;
00785 
00786                 // Add velocity to the cells around the cell that
00787                 // density was just added to, to move the inserted density
00788                 u_prev[IX(ny,nx-1)] = -cellSize;
00789                 u_prev[IX(ny,nx+1)] = cellSize;
00790                 v_prev[IX(ny-1,nx)] = -cellSize;
00791                 v_prev[IX(ny+1,nx)] = cellSize;
00792         }
00793 
00794         // The left mouse button is pushed down, the mouse is moving and the user
00795         // isn't drawing boundaries
00796         if(button == 0 && mouseMove && drawBoundaryToggle == 0 && updown == 0)
00797         {
00798                 // Add velocity to the velocity fields of the cell that was clicked on
00799                 // The amount of velocity to add is based on the how far the mouse moves
00800                 u_prev[IX(ny,nx)] += (currentX-lastX) * cellSize;
00801                 v_prev[IX(ny,nx)] += (lastY-currentY) * cellSize;
00802         }
00803 
00804         // The left mouse button is pushed down and the user is drawing boundaries
00805         if(button == 0 && drawBoundaryToggle && updown == 0)
00806         {
00807                 // There is not a boundary at the cell that is selected
00808                 if(boundary[IX(ny,nx)] == 0)
00809                 {
00810                         // The selected cell hasn't been modified in the last step
00811                         if(nx != lastCellX || ny != lastCellY)
00812                         {
00813                                 // Make the selected cell a boundary
00814                                 boundary[IX(ny,nx)] = 1;
00815 
00816                                 // Set the selected cell as the last cell to be modified
00817                                 lastCellX = nx;
00818                                 lastCellY = ny;
00819                         }
00820                 }
00821                 
00822                 // There is a boundary at the cell that is selected
00823                 else
00824                 {
00825                         // The selected cell hasn't been modified in the last step
00826                         if(lastCellX != nx || lastCellY != ny)
00827                         {
00828                                 // Make the selected cell not a boundary
00829                                 boundary[IX(ny,nx)] = 0;
00830 
00831                                 // Set the selected cell as the last cell to be modified
00832                                 lastCellX = nx;
00833                                 lastCellY = ny;
00834                         }
00835                 }
00836         }
00837 
00838         // Calls display()
00839         glutPostRedisplay();
00840 }
00841 
00842 __global__ void gpuKernelAdd_Source (int N, float *gpuX, float *gpuS, float dt)
00843 {
00844         int i, size = (N+2)*(N+2);
00845         int thisEntry;
00846 
00847         thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
00848 
00849         i = thisEntry % size;
00850 
00851         // Add the value from each position of the array gpuS multiplied by
00852         // the time step to the corresponding position in array gpuX
00853         gpuX[i] += dt*gpuS[i];
00854 }
00855 
00856 __global__ void gpuKernelDiffuse(int N,int b, float *gpuX, float *gpuX0, float diff,float dt,int *gpuBoundary)
00857 {
00858         int i,j,k;
00859         float a = dt*diff*N*N;
00860         int thisEntry; 
00861 
00862         // Perform this step multiple times for increased accuracy
00863         for(k = 0;k < 20;k++) 
00864         {
00865                 thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
00866 
00867                 j = thisEntry / (N+2);
00868                 i = thisEntry % (N+2);
00869 
00870                 // If the element in array x is a boundary then no fluid
00871                 // should be at that spot so set the density at that spot to 0
00872                 // Also don't calculate the values for the invisible boundaries,
00873                 // just set them to 0
00874                 if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry]) 
00875                 {
00876                         gpuX[thisEntry] = 0.0f;
00877                 }
00878 
00879                 // Set the density at the current position to the density of
00880                 // the temporary density field at the current position plus
00881                 // the density of the cells adjacent to the current cell.
00882                 else
00883                 {
00884                         gpuX[thisEntry] = 
00885                         (gpuX0[thisEntry] + a*(gpuX[IX(i-1,j)] +
00886                                                                         gpuX[IX(i+1,j)] +
00887                                                                         gpuX[IX(i,j-1)] +
00888                                                                         gpuX[IX(i,j+1)])) / (1+4*a);
00889                 }
00890 
00891                 // Set the values in the invisible boundary
00892                 // for the velocity field
00893                 if(b == 0)
00894                 {
00895                         if (i == 0) 
00896                         {
00897                                 gpuX[thisEntry] = gpuX[IX(1,j)];
00898                         }
00899                         if (i == (N+1))
00900                         { 
00901                                 gpuX[thisEntry] = gpuX[IX(N,i)];
00902                         }
00903                         if (j == 0) 
00904                         {
00905                                 gpuX[thisEntry] = gpuX[IX(i,1)];
00906                         }
00907                         if (j == (N+1))
00908                         { 
00909                                 gpuX[thisEntry] = gpuX[IX(i,N)];
00910                         }
00911         
00912                         if ((i == 0) && (j == 0))
00913                         {
00914                                 gpuX[thisEntry] = 0.5f*(gpuX[IX(1,0  )] + gpuX[IX(0  ,1)]);
00915                         }
00916                         if ((i == 0) && (j == (N+1)))
00917                         {
00918                                 gpuX[thisEntry] = 0.5f*(gpuX[IX(1,N+1)] + gpuX[IX(0  ,N)]);
00919                         }
00920                         if ((i == (N+1)) && (j == 0))
00921                         {
00922                                 gpuX[thisEntry] = 0.5f*(gpuX[IX(N,0  )] + gpuX[IX(N+1,1)]);
00923                         }
00924                         if ((i == (N+1)) && (j == (N+1)))
00925                         {
00926                                 gpuX[thisEntry] = 0.5f*(gpuX[IX(N,N+1)] + gpuX[IX(N+1,N)]);
00927                         }
00928                 }
00929 
00930                 // Set the values in the invisible boundary
00931                 // for the density field
00932                 else
00933                 {
00934                         if (i == 0) 
00935                         {
00936                                 gpuX[thisEntry] = 0;
00937                 }
00938                         if (i == (N+1))
00939                 { 
00940                                 gpuX[thisEntry] = 0;
00941                         }
00942                         if (j == 0) 
00943                         {
00944                                 gpuX[thisEntry] = 0;
00945                         }               
00946                         if (j == (N+1))
00947                         { 
00948                                 gpuX[thisEntry] = 0;
00949                         }
00950                 }
00951         }
00952 }
00953 
00954 __global__ void gpuKernelAdvect(int N, int b, float *gpuD, float *gpuD0, float *gpuU, float *gpuV, float dt, int *gpuBoundary)
00955 {
00956         int i, j, i0, j0, i1, j1;
00957         float x, y, s0, t0, s1, t1, dt0, temp;
00958         int thisEntry;
00959 
00960         // The time step multiplied by the size of the array
00961         dt0 = dt*N;
00962 
00963         thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
00964         j = thisEntry / (N+2);
00965         i = thisEntry % (N+2);
00966 
00967         x = i-dt0*gpuU[IX(i,j)];
00968         y = j-dt0*gpuV[IX(i,j)];
00969 
00970         if (x<0.5f)
00971                 x = 0.5f;
00972         if (x>N+0.5f)
00973                 x = N+0.5f;
00974         i0 = (int)x;
00975         i1 = i0+1;
00976 
00977         if (y<0.5f)
00978                 y = 0.5f;
00979         if (y>N+0.5f)
00980                 y = N+0.5f;
00981         j0 = (int)y;
00982         j1 = j0+1;
00983 
00984         s1 = x-i0;
00985         s0 = 1-s1;
00986 
00987         t1 = y-j0;
00988         t0 = 1-t1;
00989 
00990         // If the element in array x is a boundary then no fluid
00991         // should be at that spot so set the density at that spot to 0
00992         // Also don't calculate the values for the invisible boundaries,
00993         // just set them to 0
00994         if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
00995                 gpuD[thisEntry] = 0.0f;
00996 
00997         // Set the current cells new velocity value
00998         else
00999         {
01000                 // Find the new velocity value for the current cell
01001                 temp = s0 * (t0*gpuD0[IX(i0,j0)] + t1*gpuD0[IX(i0,j1)]) +
01002                                         s1 * (t0*gpuD0[IX(i1,j0)] + t1*gpuD0[IX(i1,j1)]);
01003 
01004                 // If velocity is moving in the horizontal direction and is moving left
01005                 // and there is a boundary to the left of the current cell
01006                 if(b == 1 && temp < 0 && gpuBoundary[IX(i,j-1)])
01007                         // Make the direction of the velocity go in the opposite direction
01008                         gpuD[IX(i,j)] = -temp;
01009 
01010                 // If the velocity is moving in the horizontal direction and is moving right
01011                 // and there is a boundary to the right of the current cell
01012                 else if(b == 1 && temp > 0 && gpuBoundary[IX(i,j+1)])
01013                         // Make the direction of the velocity go in the opposite direction
01014                         gpuD[IX(i,j)] = -temp;
01015 
01016                 // If the velocity is moving in the vertical direction and is moving down
01017                 // and there is a boundary below the current cell
01018                 else if(b == 2 && temp < 0 && gpuBoundary[IX(i-1,j)])
01019                         // Make the direction of the velocity go in the opposite direction
01020                         gpuD[IX(i,j)] = -temp;
01021 
01022                 // If the velocity is moving in the verticle direction and is moving down
01023                 // and there is a boundary above the current cell
01024                 else if(b == 2 && temp > 0 && gpuBoundary[IX(i+1,j)])
01025                         // Make the direction of the velocity go in the opposite direction
01026                         gpuD[IX(i,j)] = -temp;
01027 
01028                 // Set the new velocity value to the current cell
01029                 else
01030                         gpuD[IX(i,j)] = temp;   
01031         }
01032 
01033         // Set the values in the invisible boundary
01034         // for the velocity field
01035         if(b == 0)
01036         {
01037                 if (i == 0) 
01038                 {
01039                         gpuD[thisEntry] = gpuD[IX(1,j)];
01040         }
01041                 if (i == (N+1)) 
01042                 { 
01043                         gpuD[thisEntry] = gpuD[IX(N,i)];
01044         }
01045                 if (j == 0) 
01046                 {
01047                         gpuD[thisEntry] = gpuD[IX(i,1)];
01048         }
01049                 if (j == (N+1))
01050         { 
01051                         gpuD[thisEntry] = gpuD[IX(i,N)];
01052         }
01053 
01054         if ((i == 0) && (j == 0)) 
01055                 {
01056                         gpuD[thisEntry] = 0.5f*(gpuD[IX(1,0  )] + gpuD[IX(0  ,1)]);
01057                 }
01058         if ((i == 0) && (j == (N+1))) 
01059                 {
01060                         gpuD[thisEntry] = 0.5f*(gpuD[IX(1,N+1)] + gpuD[IX(0  ,N)]);
01061                 }
01062         if ((i == (N+1)) && (j == 0)) 
01063                 {
01064                         gpuD[thisEntry] = 0.5f*(gpuD[IX(N,0  )] + gpuD[IX(N+1,1)]);
01065                 }
01066         if ((i == (N+1)) && (j == (N+1))) 
01067                 {
01068                         gpuD[thisEntry] = 0.5f*(gpuD[IX(N,N+1)] + gpuD[IX(N+1,N)]);
01069                 }
01070         }
01071 
01072         // Set the values in the invisible boundary
01073         // for the density field
01074         else
01075         {
01076                 if (i == 0) 
01077                 {
01078                         gpuD[thisEntry] = 0;
01079         }
01080                 if (i == (N+1))
01081         { 
01082                         gpuD[thisEntry] = 0;
01083         }
01084                 if (j == 0) 
01085                 {
01086                         gpuD[thisEntry] = 0;
01087         }
01088                 if (j == (N+1))
01089         { 
01090                         gpuD[thisEntry] = 0;
01091         }
01092         }
01093 }
01094 
01095 __global__ void gpuKernelProjectStep1(int N, float *gpuU, float *gpuV, float *gpuP, float *gpuDiv, int *gpuBoundary)
01096 {
01097         int i, j;
01098         float h, temp;
01099         int thisEntry;
01100 
01101         // 1 divided by the size of the array
01102         h = 1.0f/N;
01103 
01104         thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
01105         i = thisEntry / (N+2);
01106         j = thisEntry % (N+2);
01107 
01108         // If the element in array x is a boundary then no fluid
01109         // should be at that spot so set the density at that spot to 0
01110         // Also don't calculate the values for the invisible boundaries,
01111         // just set them to 0
01112         if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
01113                 gpuDiv[thisEntry] = 0.0f;
01114         else
01115         {       
01116                 // Find the new velocity value of the current cell
01117                 temp = -0.5f*h*(gpuU[IX(i+1,j)] - gpuU[IX(i-1,j)] + gpuV[IX(i,j+1)] - gpuV[IX(i,j-1)]);
01118 
01119                 // If the velocity is moving in the vertical direction and is moving down
01120                 // and there is a boundary below the current cell
01121                 if(temp < 0 && gpuBoundary[IX(i-1,j)])
01122                         // Make the direction of the velocity go in the opposite direction
01123                         gpuDiv[thisEntry] = -temp;
01124 
01125                 // If the velocity is moving in the verticle direction and is moving down
01126                 // and there is a boundary above the current cell
01127                 else if(temp > 0 && gpuBoundary[IX(i+1,j)])
01128                         // Make the direction of the velocity go in the opposite direction
01129                         gpuDiv[thisEntry] = -temp;
01130                         
01131                 // Set the new velocity value to the current cell
01132                 else
01133                         gpuDiv[thisEntry] = temp;
01134         }
01135 
01136         // Set the horizontal velocity of the current cell to 0
01137         gpuP[thisEntry] = 0.0f;
01138 
01139         __syncthreads();
01140         
01141 
01142         // Set the values in the invisible boundary
01143         if (i == 0) 
01144         {
01145                 gpuDiv[thisEntry] = gpuDiv[IX(1,j)];
01146     }
01147         if (i == (N+1)) 
01148         { 
01149                 gpuDiv[thisEntry] = gpuDiv[IX(N,i)];
01150     }
01151         if (j == 0) 
01152         {
01153                 gpuDiv[thisEntry] = gpuDiv[IX(i,1)];
01154     }
01155         if (j == (N+1))
01156     { 
01157                 gpuDiv[thisEntry] = gpuDiv[IX(i,N)];
01158     }
01159 
01160     if ((i == 0) && (j == 0)) 
01161         {
01162                 gpuDiv[thisEntry] = 0.5f*(gpuDiv[IX(1,0  )] + gpuDiv[IX(0  ,1)]);
01163         }
01164     if ((i == 0) && (j == (N+1))) 
01165         {
01166                 gpuDiv[thisEntry] = 0.5f*(gpuDiv[IX(1,N+1)] + gpuDiv[IX(0  ,N)]);
01167         }
01168     if ((i == (N+1)) && (j == 0)) 
01169         {
01170                 gpuDiv[thisEntry] = 0.5f*(gpuDiv[IX(N,0  )] + gpuDiv[IX(N+1,1)]);
01171         }
01172     if ((i == (N+1)) && (j == (N+1))) 
01173         {
01174                 gpuDiv[thisEntry] = 0.5f*(gpuDiv[IX(N,N+1)] + gpuDiv[IX(N+1,N)]);
01175         }
01176 
01177 
01178         // Set the values in the invisible boundary
01179         if (i == 0) 
01180         {
01181                 gpuP[thisEntry] = gpuP[IX(1,j)];
01182     }
01183         if (i == (N+1)) 
01184         { 
01185                 gpuP[thisEntry] = gpuP[IX(N,i)];
01186     }
01187         if (j == 0) 
01188         {
01189                 gpuP[thisEntry] = gpuP[IX(i,1)];
01190     }
01191         if (j == (N+1))
01192     { 
01193                 gpuP[thisEntry] = gpuP[IX(i,N)];
01194     }
01195 
01196     if ((i == 0) && (j == 0)) 
01197         {
01198                 gpuP[thisEntry] = 0.5f*(gpuP[IX(1,0  )] + gpuP[IX(0  ,1)]);
01199         }
01200     if ((i == 0) && (j == (N+1))) 
01201         {
01202                 gpuP[thisEntry] = 0.5f*(gpuP[IX(1,N+1)] + gpuP[IX(0  ,N)]);
01203         }
01204     if ((i == (N+1)) && (j == 0)) 
01205         {
01206                 gpuP[thisEntry] = 0.5f*(gpuP[IX(N,0  )] + gpuP[IX(N+1,1)]);
01207         }
01208     if ((i == (N+1)) && (j == (N+1))) 
01209         {
01210                 gpuP[thisEntry] = 0.5f*(gpuP[IX(N,N+1)] + gpuP[IX(N+1,N)]);
01211         }
01212 }
01213 
01214 __global__ void gpuKernelProjectStep2(int N, float *gpuP, float *gpuDiv, int *gpuBoundary)
01215 {
01216         int i, j, k;
01217         float temp;
01218         int thisEntry;
01219 
01220         // Perform this step multiple times for increased accuracy
01221         for (k=0; k<20; k++)
01222         {                       
01223                 thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
01224                 i = thisEntry / (N+2);
01225                 j = thisEntry % (N+2);
01226 
01227                 // If the element in array x is a boundary then no fluid
01228                 // should be at that spot so set the density at that spot to 0
01229                 // Also don't calculate the values for the invisible boundaries,
01230                 // just set them to 0
01231                 if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
01232                         gpuP[thisEntry] = 0.0f;
01233                 else
01234                 {
01235                         // Find the new velocity value of the current cell
01236                         temp = (gpuDiv[thisEntry] + gpuP[IX(i-1,j)] + gpuP[IX(i+1,j)] + gpuP[IX(i,j-1)] + gpuP[IX(i,j+1)]) / 4;
01237 
01238                         // If velocity is moving in the horizontal direction and is moving left
01239                         // and there is a boundary to the left of the current cell
01240                         if(temp < 0 && gpuBoundary[IX(i,j-1)])
01241                                 // Make the direction of the velocity go in the opposite direction
01242                                 gpuP[thisEntry] = -temp;
01243                         
01244                         // If the velocity is moving in the horizontal direction and is moving right
01245                         // and there is a boundary to the right of the current cell
01246                         else if(temp > 0 && gpuBoundary[IX(i,j+1)])
01247                                 // Make the direction of the velocity go in the opposite direction
01248                                 gpuP[thisEntry] = -temp;
01249 
01250                         // Set the new velocity value to the current cell
01251                         else
01252                                 gpuP[thisEntry] = temp;
01253                 }
01254                 __syncthreads();
01255                 
01256                 // Set the values in the invisible boundary
01257                 if (i == 0) 
01258                 {
01259                         gpuP[thisEntry] = gpuP[IX(1,j)];
01260                 }
01261                 if (i == (N+1)) 
01262                 { 
01263                         gpuP[thisEntry] = gpuP[IX(N,i)];
01264                 }
01265                 if (j == 0) 
01266                 {
01267                         gpuP[thisEntry] = gpuP[IX(i,1)];
01268                 }
01269                 if (j == (N+1))
01270                 { 
01271                         gpuP[thisEntry] = gpuP[IX(i,N)];
01272                 }
01273 
01274                 if ((i == 0) && (j == 0)) 
01275                 {
01276                         gpuP[thisEntry] = 0.5f*(gpuP[IX(1,0  )] + gpuP[IX(0  ,1)]);
01277                 }
01278                 if ((i == 0) && (j == (N+1))) 
01279                 {
01280                         gpuP[thisEntry] = 0.5f*(gpuP[IX(1,N+1)] + gpuP[IX(0  ,N)]);
01281                 }
01282                 if ((i == (N+1)) && (j == 0)) 
01283                 {
01284                         gpuP[thisEntry] = 0.5f*(gpuP[IX(N,0  )] + gpuP[IX(N+1,1)]);
01285                 }
01286                 if ((i == (N+1)) && (j == (N+1))) 
01287                 {
01288                         gpuP[thisEntry] = 0.5f*(gpuP[IX(N,N+1)] + gpuP[IX(N+1,N)]);
01289                 }               
01290         }
01291 }
01292 
01293 __global__ void gpuKernelProjectStep3(int N, float *gpuU, float *gpuV, float *gpuP, int *gpuBoundary)
01294 {
01295         int i, j;
01296         float h, temp;
01297         int thisEntry;
01298 
01299         // 1 divided by the size of the array
01300         h = 1.0f/N;
01301 
01302         thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
01303         i = thisEntry / (N+2);
01304         j = thisEntry % (N+2);
01305 
01306         // If the element in array x is a boundary then no fluid
01307         // should be at that spot so set the density at that spot to 0
01308         // Also don't calculate the values for the invisible boundaries,
01309         // just set them to 0
01310         if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
01311         {
01312                 gpuU[thisEntry] = 0.0f;
01313                 gpuV[thisEntry] = 0.0f;
01314         }
01315 
01316         // The current cell is not a boundary
01317         else
01318         {       
01319                 // Find the new horizontal velocity value of the current cell
01320                 temp = 0.5f * (gpuP[IX(i+1,j)] - gpuP[IX(i-1,j)]) / h;
01321 
01322                 // If velocity is moving in the horizontal direction and is moving left
01323                 // and there is a boundary to the left of the current cell
01324                 if(temp < 0 && gpuBoundary[IX(i,j-1)])
01325                         // Make the direction of the velocity go in the opposite direction
01326                         gpuU[thisEntry] -= -temp;
01327 
01328                 // If the velocity is moving in the horizontal direction and is moving right
01329                 // and there is a boundary to the right of the current cell
01330                 else if(temp > 0 && gpuBoundary[IX(i,j+1)])
01331                         // Make the direction of the velocity go in the opposite direction
01332                         gpuU[thisEntry] -= -temp;
01333 
01334                 // Set the new horizontal velocity value to the current cell
01335                 else
01336                         gpuU[thisEntry] -= temp;
01337 
01338                 // Find the new vertical velocity value of the current cell
01339                 temp = 0.5f * (gpuP[IX(i,j+1)] - gpuP[IX(i,j-1)]) / h;
01340 
01341                 // If the velocity is moving in the vertical direction and is moving down
01342                 // and there is a boundary below the current cell
01343                 if(temp < 0 && gpuBoundary[IX(i-1,j)])
01344                         // Make the direction of the velocity go in the opposite direction
01345                         gpuV[thisEntry] -= -temp;
01346 
01347                 // If the velocity is moving in the verticle direction and is moving down
01348                 // and there is a boundary above the current cell
01349                 else if(temp > 0 && gpuBoundary[IX(i+1,j)])
01350                         // Make the direction of the velocity go in the opposite direction
01351                         gpuV[thisEntry] -= -temp;
01352 
01353                 // Set the new vertical velocity value to the current cell
01354                 else
01355                         gpuV[thisEntry] -= temp;
01356         }
01357 
01358         __syncthreads();
01359 
01360         
01361         // Set the values in the invisible boundary
01362         if (i == 0) 
01363         {
01364                 gpuU[thisEntry] = gpuU[IX(1,j)];
01365         }
01366         if (i == (N+1)) 
01367         { 
01368                 gpuU[thisEntry] = gpuU[IX(N,i)];
01369         }
01370         if (j == 0) 
01371         {
01372                 gpuU[thisEntry] = gpuU[IX(i,1)];
01373         }
01374         if (j == (N+1))
01375         { 
01376                 gpuU[thisEntry] = gpuU[IX(i,N)];
01377         }
01378 
01379         if ((i == 0) && (j == 0)) 
01380         {
01381                 gpuU[thisEntry] = 0.5f*(gpuU[IX(1,0  )] + gpuU[IX(0  ,1)]);
01382         }
01383         if ((i == 0) && (j == (N+1))) 
01384         {
01385                 gpuU[thisEntry] = 0.5f*(gpuU[IX(1,N+1)] + gpuU[IX(0  ,N)]);
01386         }
01387         if ((i == (N+1)) && (j == 0)) 
01388         {
01389                 gpuU[thisEntry] = 0.5f*(gpuU[IX(N,0  )] + gpuU[IX(N+1,1)]);
01390         }
01391         if ((i == (N+1)) && (j == (N+1))) 
01392         {
01393                 gpuU[thisEntry] = 0.5f*(gpuU[IX(N,N+1)] + gpuU[IX(N+1,N)]);
01394         }
01395 
01396 
01397         // Set the values in the invisible boundary
01398         if (i == 0) 
01399         {
01400                 gpuV[thisEntry] = gpuV[IX(1,j)];
01401         }
01402         if (i == (N+1)) 
01403         { 
01404                 gpuV[thisEntry] = gpuV[IX(N,i)];
01405         }
01406         if (j == 0) 
01407         {
01408                 gpuV[thisEntry] = gpuV[IX(i,1)];
01409         }
01410         if (j == (N+1))
01411         { 
01412                 gpuV[thisEntry] = gpuV[IX(i,N)];
01413         }
01414 
01415         if ((i == 0) && (j == 0)) 
01416         {
01417                 gpuV[thisEntry] = 0.5f*(gpuV[IX(1,0  )] + gpuV[IX(0  ,1)]);
01418         }
01419         if ((i == 0) && (j == (N+1))) 
01420         {
01421                 gpuV[thisEntry] = 0.5f*(gpuV[IX(1,N+1)] + gpuV[IX(0  ,N)]);
01422         }
01423         if ((i == (N+1)) && (j == 0)) 
01424         {
01425                 gpuV[thisEntry] = 0.5f*(gpuV[IX(N,0  )] + gpuV[IX(N+1,1)]);
01426         }
01427         if ((i == (N+1)) && (j == (N+1))) 
01428         {
01429                 gpuV[thisEntry] = 0.5f*(gpuV[IX(N,N+1)] + gpuV[IX(N+1,N)]);
01430         }
01431 }
01432 
01433 void gpuCalcFluid()
01434 {
01435         float *gpuU;
01436         float *gpuV;
01437         float *gpuUPrev;
01438         float *gpuVPrev;
01439         float *gpuDens;
01440         float *gpuDensPrev;
01441         int *gpuBoundary;
01442         int result;
01443 
01444         // Allocate the arrays in the GPU memory
01445         result = cudaMalloc((void**)&gpuU, sizeof(float)*SIZE);
01446         if (result != cudaSuccess) {
01447                 printf("In gpuCalcFluid: cudaMalloc for gpuU failed.\n");
01448                 exit(1);
01449         }
01450         result = cudaMalloc((void**)&gpuV, sizeof(float)*SIZE);
01451         if (result != cudaSuccess) {
01452                 printf("In gpuCalcFluid: cudaMalloc for gpuV failed.\n");
01453                 exit(1);
01454         }
01455         result = cudaMalloc((void**)&gpuUPrev, sizeof(float)*SIZE);
01456         if (result != cudaSuccess) {
01457                 printf("In gpuCalcFluid: cudaMalloc for gpuUPrev failed.\n");
01458                 exit(1);
01459         }
01460         result = cudaMalloc((void**)&gpuVPrev, sizeof(float)*SIZE);
01461         if (result != cudaSuccess) {
01462                 printf("In gpuCalcFluid: cudaMalloc for gpuVPrev failed.\n");
01463                 exit(1);
01464         }
01465         result = cudaMalloc((void**)&gpuDens, sizeof(float)*SIZE);
01466         if (result != cudaSuccess) {
01467                 printf("In gpuCalcFluid: cudaMalloc for gpuDens failed.\n");
01468                 exit(1);
01469         }
01470         result = cudaMalloc((void**)&gpuDensPrev, sizeof(float)*SIZE);
01471         if (result != cudaSuccess) {
01472                 printf("In gpuCalcFluid: cudaMalloc for gpuDensPrev failed.\n");
01473                 exit(1);
01474         }
01475         result = cudaMalloc((void**)&gpuBoundary, sizeof(int)*SIZE);
01476         if (result != cudaSuccess) {
01477                 printf("In gpuCalcFluid: cudaMalloc for gpuBoundary failed.\n");
01478                 exit(1);
01479         }
01480 
01481         // Copy the arrays from main memory to the memory of the GPU
01482         result = cudaMemcpy(gpuU, u, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01483         if (result != cudaSuccess) {
01484                 printf("In gpuCalcFluid: cudaMemcpy for gpuU failed.\n");
01485                 exit(1);
01486         }
01487         result = cudaMemcpy(gpuV, v, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01488         if (result != cudaSuccess) {
01489                 printf("In gpuCalcFluid: cudaMemcpy for gpuV failed.\n");
01490                 exit(1);
01491         }
01492         result = cudaMemcpy(gpuUPrev, u_prev, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01493         if (result != cudaSuccess) {
01494                 printf("In gpuCalcFluid: cudaMemcpy for gpuUPrev failed.\n");
01495                 exit(1);
01496         }
01497         result = cudaMemcpy(gpuVPrev, v_prev, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01498         if (result != cudaSuccess) {
01499                 printf("In gpuCalcFluid: cudaMemcpy for gpuVPrev failed.\n");
01500                 exit(1);
01501         }
01502         result = cudaMemcpy(gpuDens, dens, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01503         if (result != cudaSuccess) {
01504                 printf("In gpuCalcFluid: cudaMemcpy for gpuDens failed.\n");
01505                 exit(1);
01506         }
01507         result = cudaMemcpy(gpuDensPrev, dens_prev, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01508         if (result != cudaSuccess) {
01509                 printf("In gpuCalcFluid: cudaMemcpy for gpuDensPrev failed.\n");
01510                 exit(1);
01511         }
01512         result = cudaMemcpy(gpuBoundary, boundary, sizeof(int)*SIZE, cudaMemcpyHostToDevice);
01513         if (result != cudaSuccess) {
01514                 printf("In gpuProject: cudaMemcpy for gpuBoundary failed.\n");
01515                 exit(1);
01516         }
01517 
01518         // Indicate the dimension of the block
01519         dim3 dimblock(BLOCK_SIZE);
01520         // Indicate the dimension of the grid measured in blocks
01521         dim3 dimgrid(SIZE/BLOCK_SIZE);
01522 
01523         // Densisty Step
01524         gpuKernelAdd_Source<<<dimgrid,dimblock>>>(N,gpuDens,gpuDensPrev,dt);
01525         SWAP (gpuDensPrev, gpuDens);
01526         gpuKernelDiffuse<<<dimgrid,dimblock>>>(N,1,gpuDens,gpuDensPrev,diff,dt,gpuBoundary);
01527         SWAP (gpuDensPrev, gpuDens);
01528         gpuKernelAdvect<<<dimgrid,dimblock>>>(N,1,gpuDens,gpuDensPrev,gpuU,gpuV,dt,gpuBoundary);
01529 
01530         // Velocity Step
01531         gpuKernelAdd_Source<<<dimgrid,dimblock>>>(N,gpuU,gpuUPrev,dt);
01532         gpuKernelAdd_Source<<<dimgrid,dimblock>>>(N,gpuV,gpuVPrev,dt);
01533         SWAP (gpuUPrev,gpuU);
01534         gpuKernelDiffuse<<<dimgrid,dimblock>>>(N,0,gpuU,gpuUPrev,visc,dt,gpuBoundary);
01535         SWAP (gpuVPrev,gpuV);
01536         gpuKernelDiffuse<<<dimgrid,dimblock>>>(N,0,gpuV,gpuVPrev,visc,dt,gpuBoundary);
01537         gpuKernelProjectStep1<<<dimgrid,dimblock>>>(N, gpuU, gpuV, gpuUPrev, gpuVPrev, gpuBoundary);
01538         gpuKernelProjectStep2<<<dimgrid,dimblock>>>(N, gpuUPrev, gpuVPrev, gpuBoundary);
01539         gpuKernelProjectStep3<<<dimgrid,dimblock>>>(N, gpuU, gpuV, gpuUPrev, gpuBoundary);
01540         SWAP (gpuUPrev,gpuU);
01541         SWAP (gpuVPrev,gpuV);
01542         gpuKernelAdvect<<<dimgrid,dimblock>>>(N,0,gpuU,gpuUPrev,gpuUPrev,gpuVPrev,dt,gpuBoundary);
01543         gpuKernelAdvect<<<dimgrid,dimblock>>>(N,0,gpuV,gpuVPrev,gpuUPrev,gpuVPrev,dt,gpuBoundary);
01544         gpuKernelProjectStep1<<<dimgrid,dimblock>>>(N, gpuU, gpuV, gpuUPrev, gpuVPrev, gpuBoundary);
01545         gpuKernelProjectStep2<<<dimgrid,dimblock>>>(N, gpuUPrev, gpuVPrev, gpuBoundary);
01546         gpuKernelProjectStep3<<<dimgrid,dimblock>>>(N, gpuU, gpuV, gpuUPrev, gpuBoundary);
01547         
01548         // Copy the results back from the GPU memory to the CPU memory
01549         result = cudaMemcpy(u, gpuU, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01550         if (result != cudaSuccess) {
01551                 printf("In gpuCalcFluid: cudaMemcpy back to host for u failed.\n");
01552                 exit(1);
01553         }
01554         result = cudaMemcpy(v, gpuV, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01555         if (result != cudaSuccess) {
01556                 printf("In gpuCalcFluid: cudaMemcpy back to host for v failed.\n");
01557                 exit(1);
01558         }
01559         result = cudaMemcpy(u_prev, gpuUPrev, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01560         if (result != cudaSuccess) {
01561                 printf("In gpuCalcFluid: cudaMemcpy back to host for u_prev failed.\n");
01562                 exit(1);
01563         }
01564         result = cudaMemcpy(v_prev, gpuVPrev, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01565         if (result != cudaSuccess) {
01566                 printf("In gpuCalcFluid: cudaMemcpy back to host for v_prev failed.\n");
01567                 exit(1);
01568         }
01569                 result = cudaMemcpy(dens, gpuDens, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01570         if (result != cudaSuccess) {
01571                 printf("In gpuCalcFluid: cudaMemcpy back to host for dens failed.\n");
01572                 exit(1);
01573         }
01574         result = cudaMemcpy(dens_prev, gpuDensPrev, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01575         if (result != cudaSuccess) {
01576                 printf("In gpuCalcFluid: cudaMemcpy back to host for dens_prev failed.\n");
01577                 exit(1);
01578         }
01579 
01580         // Free the arrays
01581         result = cudaFree(gpuU);
01582         if (result != cudaSuccess) {
01583                 printf("In gpuCalcFluid: cudaFree gpuU failed.\n");
01584                 exit(1);
01585         }
01586         result = cudaFree(gpuV);
01587         if (result != cudaSuccess) {
01588                 printf("In gpuCalcFluid: cudaFree gpuV failed.\n");
01589                 exit(1);
01590         }
01591         result = cudaFree(gpuUPrev);
01592         if (result != cudaSuccess) {
01593                 printf("In gpuCalcFluid: cudaFree gpuUPrev failed.\n");
01594                 exit(1);
01595         }
01596         result = cudaFree(gpuVPrev);
01597         if (result != cudaSuccess) {
01598                 printf("In gpuCalcFluid: cudaFree gpuVPrev failed.\n");
01599                 exit(1);
01600         }
01601         result = cudaFree(gpuDens);
01602         if (result != cudaSuccess) {
01603                 printf("In gpuCalcFluid: cudaFree gpuDens failed.\n");
01604                 exit(1);
01605         }
01606         result = cudaFree(gpuDensPrev);
01607         if (result != cudaSuccess) {
01608                 printf("In gpuCalcFluid: cudaFree gpuDensPrev failed.\n");
01609                 exit(1);
01610         }
01611         result = cudaFree(gpuBoundary);
01612         if (result != cudaSuccess) {
01613                 printf("In gpuProject: cudaFree gpuBoundary failed.\n");
01614                 exit(1);
01615         }
01616 }