00001
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068 #include <stdio.h>
00069 #include <stdlib.h>
00070 #include <cutil.h>
00071 #include "glut.h"
00072
00073 #define IX(i,j) ((i)+(N+2)*(j))
00074 #define SWAP(x0,x) {float *tmp = x0; x0 = x; x = tmp;}
00075 #define DIM 128
00076 #define SIZE DIM*DIM
00077 #define max(x,y) x>y?x:y
00078 #define BLOCK_SIZE 32
00079
00080
00090 void printArray (int N, float *x);
00091
00092
00100 void init (void);
00101
00102
00109 void display (void);
00110
00111
00117 void idle (void);
00118
00119
00140 void reshape (int x, int y);
00141
00142
00153 void keyboard (unsigned char key, int x, int y);
00154
00155
00167 void click (int button, int state, int x, int y);
00168
00169
00180 void motion (int x, int y);
00181
00182
00192 void timer (int value);
00193
00194
00207 void updateFluid (int button, int mouseMove, int state);
00208
00209
00221 __global__ void gpuKernelAdd_Source (int N, float *gpuX, float *gpuS, float dt);
00222
00223
00243 __global__ void gpuKernelDiffuse(int N,int b, float *gpuX, float *gpuX0, float diff,float dt,int *gpuBoundary);
00244
00245
00262 __global__ void gpuKernelAdvect(int N, int b, float *gpuD, float *gpuD0, float *gpuU, float *gpuV, float dt, int *gpuBoundary);
00263
00264
00279 __global__ void gpuKernelProjectStep1(int N, float *gpuU, float *gpuV, float *gpuP, float *gpuDiv, int *gpuBoundary);
00280
00281
00294 __global__ void gpuKernelProjectStep2(int N, float *gpuP, float *gpuDiv, int *gpuBoundary);
00295
00296
00310 __global__ void gpuKernelProjectStep3(int N, float *gpuU, float *gpuV, float *gpuP, int *gpuBoundary);
00311
00312
00319 void gpuCalcFluid(void);
00320
00321
00322 float u[SIZE];
00323 float v[SIZE];
00324 float u_prev[SIZE];
00325 float v_prev[SIZE];
00326 float dens[SIZE];
00327 float dens_prev[SIZE];
00328 int boundary[SIZE];
00329 int wWidth = max(512,DIM);
00330 int wHeight = max(512,DIM);
00331 float cellSize;
00332 float diff = 0.5f;
00333 float dt = .2f;
00334 float visc = 0.0f;
00335 int stepToggle = 0;
00336 int runToggle = 1;
00337 int consoleToggle = 0;
00338 int fluidToggle = 0;
00339 int densityToggle = 1;
00340 int velocityToggle = 0;
00341 int gridToggle = 0;
00342 int boundaryToggle = 1;
00343 int drawBoundaryToggle = 0;
00344 int id;
00345 int lastX = 0;
00346 int lastY = 0;
00347 int currentX = 0;
00348 int currentY = 0;
00349 int clicked = 0;
00350 int leftClick = 0;
00351 int N = DIM-2;
00352 int emitterLoc = DIM/2;
00353 int frameRate = 60;
00354 int updown = 0;
00355 int lastCellX;
00356 int lastCellY;
00357 float totalDensity = 0;
00358
00359
00360 int main(int argc, char *argv[]) {
00361 int i;
00362
00363
00364 for (i=0; i<SIZE; i++)
00365 {
00366 u[i] = 0.0f;
00367 u_prev[i] = 0.0f;
00368 v[i] = 0.0f;
00369 v_prev[i] = 0.0f;
00370 dens[i] = 0.0f;
00371 dens_prev[i] = 0.0f;
00372 boundary[i] = 0;
00373 }
00374
00375
00376 glutInit(&argc, argv);
00377 glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
00378 glutInitWindowSize(wWidth, wHeight);
00379 glutCreateWindow("Compute Stable Fluids");
00380 init();
00381 glutDisplayFunc(display);
00382 glutReshapeFunc(reshape);
00383 glutIdleFunc(idle);
00384 glutKeyboardFunc(keyboard);
00385 glutMouseFunc(click);
00386 glutMotionFunc(motion);
00387 glutTimerFunc(100, timer, frameRate);
00388
00389 CUDA_SAFE_CALL(cudaSetDevice(2));
00390
00391
00392 glutMainLoop();
00393
00394 return 0;
00395 }
00396
00397 void printArray (int N, float *x)
00398 {
00399 int i, j;
00400
00401
00402 for (i=N+1; i>=0; i--)
00403 {
00404
00405 for (j=0; j<N+2; j++)
00406 {
00407 printf("%f ", x[IX(i,j)]);
00408 }
00409
00410
00411 printf("\n");
00412 }
00413 }
00414
00415 void init (void)
00416 {
00417
00418 cellSize = wWidth/N;
00419
00420
00421 id = glGenLists(1);
00422
00423
00424 glNewList(id, GL_COMPILE);
00425 {
00426 glBegin(GL_QUADS);
00427 {
00428 glVertex3f(-cellSize/2.0f, cellSize/2.0f, 0);
00429 glVertex3f(-cellSize/2.0f, -cellSize/2.0f, 0);
00430 glVertex3f(cellSize/2.0f, -cellSize/2.0f, 0);
00431 glVertex3f(cellSize/2.0f, cellSize/2.0f, 0);
00432 }
00433 glEnd();
00434 }
00435 glEndList();
00436
00437
00438 glClearColor(0x00, 0x00, 0x00, 1);
00439 }
00440
00441 void display (void)
00442 {
00443 int i, j;
00444 float c;
00445
00446
00447 glClear (GL_COLOR_BUFFER_BIT);
00448
00449
00450 if(runToggle || stepToggle)
00451 {
00452
00453 gpuCalcFluid();
00454
00455
00456 totalDensity = 0;
00457 for (i=N; i>=1; i--)
00458 {
00459 for (j=1; j<=N; j++)
00460 {
00461 totalDensity += dens[IX(i,j)];
00462 }
00463 }
00464
00465
00466 if (consoleToggle)
00467 {
00468
00469 printf("\n\nDensity Field\n");
00470 printArray(N, dens);
00471
00472
00473 printf("\n\nu Field\n");
00474 printArray(N, u);
00475
00476
00477 printf("\n\nv Field\n");
00478 printArray(N, v);
00479
00480
00481 printf("\n\nTotal Density: %f\n", totalDensity);
00482 }
00483
00484
00485 stepToggle = 0;
00486 }
00487
00488
00489 for (i=N; i>=1; i--)
00490 {
00491 for (j=1; j<=N; j++)
00492 {
00493
00494 if(boundaryToggle)
00495 {
00496
00497 if(boundary[IX(i,j)] == 1)
00498 glColor3ub(0x00, 0x00, 0xFF);
00499
00500 else
00501 glColor3ub(0x00, 0x00, 0x00);
00502
00503 glPushMatrix();
00504 {
00505
00506 glTranslatef(((j)*cellSize)+(cellSize/2), ((i)*cellSize)+(cellSize/2), 0);
00507
00508
00509 glCallList(id);
00510 }
00511 glPopMatrix();
00512 }
00513
00514
00515 if(densityToggle)
00516 {
00517 if(dens[IX(i,j)] > 0)
00518 {
00519
00520 c = (dens[IX(i,j)]);
00521 if(c == 0)
00522 glColor3f(0,0,0);
00523 else if(c < .33)
00524 glColor3f(0,0,c);
00525 else if(c < .66)
00526 glColor3f(0,c,0);
00527 else
00528 glColor3f(c,0,0);
00529
00530 glPushMatrix();
00531 {
00532
00533 glTranslatef(((j)*cellSize)+(cellSize/2), ((i)*cellSize)+(cellSize/2), 0);
00534
00535
00536 glCallList(id);
00537 }
00538 glPopMatrix();
00539 }
00540 }
00541
00542
00543 if(velocityToggle)
00544 {
00545
00546 glColor3ub(0xFF, 0x00, 0x00);
00547
00548 glPushMatrix();
00549 {
00550 float x, y;
00551
00552
00553 x = u[IX(i,j)] * cellSize * 10;
00554
00555
00556 y = v[IX(i,j)] * cellSize * 10;
00557
00558
00559 glTranslatef(((j)*cellSize)+(cellSize/2), ((i)*cellSize)+(cellSize/2), 0);
00560
00561
00562 glBegin(GL_LINES);
00563 {
00564 glVertex3f(0, 0, 0);
00565 glVertex3f(x, y, 0);
00566 }
00567 glEnd();
00568 }
00569 glPopMatrix();
00570 }
00571
00572
00573 if(gridToggle)
00574 {
00575
00576 glColor3ub(0x00, 0xFF, 0x00);
00577
00578 glPushMatrix();
00579 {
00580 glPushAttrib(GL_POLYGON_BIT);
00581 {
00582
00583 glPolygonMode (GL_FRONT, GL_LINE);
00584
00585
00586 glTranslatef(((j)*cellSize)+(cellSize/2), ((i)*cellSize)+(cellSize/2), 0);
00587
00588
00589 glCallList(id);
00590 }
00591 glPopAttrib();
00592 }
00593 glPopMatrix();
00594 }
00595 }
00596 }
00597
00598
00599 glutSwapBuffers();
00600 }
00601
00602 void idle (void)
00603 {
00604
00605 glutPostRedisplay();
00606 }
00607
00608
00609 void reshape (int x, int y)
00610 {
00611
00612 wWidth = x;
00613
00614
00615 wHeight = y;
00616
00617
00618 glDeleteLists(id,0);
00619
00620
00621 init();
00622
00623
00624 glViewport(0, 0, x, y);
00625 glMatrixMode(GL_PROJECTION);
00626 glLoadIdentity();
00627 glOrtho(0, wWidth, 0, wHeight, 0, 1);
00628 glMatrixMode(GL_MODELVIEW);
00629 glLoadIdentity();
00630
00631
00632 glutPostRedisplay();
00633 }
00634
00635 void keyboard (unsigned char key,int x, int y)
00636 {
00637 int i;
00638
00639 switch(key)
00640 {
00641
00642 case 27:
00643 exit(0);
00644
00645
00646 case 'c':
00647 consoleToggle = !consoleToggle;
00648 break;
00649
00650
00651 case 's':
00652 runToggle = !runToggle;
00653 break;
00654
00655
00656 case 'f':
00657 dens_prev[IX(1,emitterLoc)] = N * cellSize;
00658 v_prev[IX(1,emitterLoc)] = cellSize;
00659 break;
00660
00661
00662 case 'd':
00663 densityToggle = !densityToggle;
00664 break;
00665
00666
00667 case 'v':
00668 velocityToggle = !velocityToggle;
00669 break;
00670
00671
00672 case 'g':
00673 gridToggle = !gridToggle;
00674 break;
00675
00676
00677 case 'b':
00678 boundaryToggle = !boundaryToggle;
00679 break;
00680
00681
00682 case 'B':
00683 drawBoundaryToggle = !drawBoundaryToggle;
00684 break;
00685
00686
00687 case 'r':
00688
00689 for (i=0; i<SIZE; i++)
00690 {
00691 u[i] = 0.0f;
00692 u_prev[i] = 0.0f;
00693 v[i] = 0.0f;
00694 v_prev[i] = 0.0f;
00695 dens[i] = 0.0f;
00696 dens_prev[i] = 0.0f;
00697 boundary[i] = 0;
00698 }
00699 break;
00700
00701
00702 case 32:
00703 stepToggle = 1;
00704 break;
00705 }
00706
00707
00708 glutPostRedisplay();
00709 }
00710
00711 void click (int button, int state, int x, int y)
00712 {
00713 updown = state;
00714
00715
00716 if(state == 0)
00717 {
00718 lastCellX = -1;
00719 lastCellY = -1;
00720 }
00721
00722
00723 lastX = currentX;
00724 lastY = currentY;
00725
00726
00727 currentX = x;
00728 currentY = y;
00729
00730
00731 updateFluid(button, 0, updown);
00732 }
00733
00734 void motion (int x, int y)
00735 {
00736
00737 if(updown == 0)
00738 {
00739 lastX = currentX;
00740 lastY = currentY;
00741
00742 currentX = x;
00743 currentY = y;
00744
00745
00746 updateFluid(0, 1, updown);
00747 }
00748 }
00749
00750 void timer(int value)
00751 {
00752
00753 glutPostRedisplay();
00754
00755
00756 glutTimerFunc(1000/frameRate, timer, value);
00757 }
00758
00759 void updateFluid (int button, int mouseMove, int state)
00760 {
00761 float fx, fy;
00762 int nx, ny;
00763
00764
00765 fx = currentX / (float)wWidth;
00766 fy = (wHeight - currentY) / (float)wHeight;
00767 nx = (int)(fx * DIM);
00768 ny = (int)(fy * DIM);
00769
00770
00771 if(nx < 1)
00772 nx = 1;
00773 else if(nx > N)
00774 nx = N;
00775 if(ny < 1)
00776 ny = 1;
00777 else if(ny > N)
00778 ny = N;
00779
00780
00781 if(button == 2)
00782 {
00783
00784 dens_prev[IX(ny,nx)] = N * cellSize;
00785
00786
00787
00788 u_prev[IX(ny,nx-1)] = -cellSize;
00789 u_prev[IX(ny,nx+1)] = cellSize;
00790 v_prev[IX(ny-1,nx)] = -cellSize;
00791 v_prev[IX(ny+1,nx)] = cellSize;
00792 }
00793
00794
00795
00796 if(button == 0 && mouseMove && drawBoundaryToggle == 0 && updown == 0)
00797 {
00798
00799
00800 u_prev[IX(ny,nx)] += (currentX-lastX) * cellSize;
00801 v_prev[IX(ny,nx)] += (lastY-currentY) * cellSize;
00802 }
00803
00804
00805 if(button == 0 && drawBoundaryToggle && updown == 0)
00806 {
00807
00808 if(boundary[IX(ny,nx)] == 0)
00809 {
00810
00811 if(nx != lastCellX || ny != lastCellY)
00812 {
00813
00814 boundary[IX(ny,nx)] = 1;
00815
00816
00817 lastCellX = nx;
00818 lastCellY = ny;
00819 }
00820 }
00821
00822
00823 else
00824 {
00825
00826 if(lastCellX != nx || lastCellY != ny)
00827 {
00828
00829 boundary[IX(ny,nx)] = 0;
00830
00831
00832 lastCellX = nx;
00833 lastCellY = ny;
00834 }
00835 }
00836 }
00837
00838
00839 glutPostRedisplay();
00840 }
00841
00842 __global__ void gpuKernelAdd_Source (int N, float *gpuX, float *gpuS, float dt)
00843 {
00844 int i, size = (N+2)*(N+2);
00845 int thisEntry;
00846
00847 thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
00848
00849 i = thisEntry % size;
00850
00851
00852
00853 gpuX[i] += dt*gpuS[i];
00854 }
00855
00856 __global__ void gpuKernelDiffuse(int N,int b, float *gpuX, float *gpuX0, float diff,float dt,int *gpuBoundary)
00857 {
00858 int i,j,k;
00859 float a = dt*diff*N*N;
00860 int thisEntry;
00861
00862
00863 for(k = 0;k < 20;k++)
00864 {
00865 thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
00866
00867 j = thisEntry / (N+2);
00868 i = thisEntry % (N+2);
00869
00870
00871
00872
00873
00874 if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
00875 {
00876 gpuX[thisEntry] = 0.0f;
00877 }
00878
00879
00880
00881
00882 else
00883 {
00884 gpuX[thisEntry] =
00885 (gpuX0[thisEntry] + a*(gpuX[IX(i-1,j)] +
00886 gpuX[IX(i+1,j)] +
00887 gpuX[IX(i,j-1)] +
00888 gpuX[IX(i,j+1)])) / (1+4*a);
00889 }
00890
00891
00892
00893 if(b == 0)
00894 {
00895 if (i == 0)
00896 {
00897 gpuX[thisEntry] = gpuX[IX(1,j)];
00898 }
00899 if (i == (N+1))
00900 {
00901 gpuX[thisEntry] = gpuX[IX(N,i)];
00902 }
00903 if (j == 0)
00904 {
00905 gpuX[thisEntry] = gpuX[IX(i,1)];
00906 }
00907 if (j == (N+1))
00908 {
00909 gpuX[thisEntry] = gpuX[IX(i,N)];
00910 }
00911
00912 if ((i == 0) && (j == 0))
00913 {
00914 gpuX[thisEntry] = 0.5f*(gpuX[IX(1,0 )] + gpuX[IX(0 ,1)]);
00915 }
00916 if ((i == 0) && (j == (N+1)))
00917 {
00918 gpuX[thisEntry] = 0.5f*(gpuX[IX(1,N+1)] + gpuX[IX(0 ,N)]);
00919 }
00920 if ((i == (N+1)) && (j == 0))
00921 {
00922 gpuX[thisEntry] = 0.5f*(gpuX[IX(N,0 )] + gpuX[IX(N+1,1)]);
00923 }
00924 if ((i == (N+1)) && (j == (N+1)))
00925 {
00926 gpuX[thisEntry] = 0.5f*(gpuX[IX(N,N+1)] + gpuX[IX(N+1,N)]);
00927 }
00928 }
00929
00930
00931
00932 else
00933 {
00934 if (i == 0)
00935 {
00936 gpuX[thisEntry] = 0;
00937 }
00938 if (i == (N+1))
00939 {
00940 gpuX[thisEntry] = 0;
00941 }
00942 if (j == 0)
00943 {
00944 gpuX[thisEntry] = 0;
00945 }
00946 if (j == (N+1))
00947 {
00948 gpuX[thisEntry] = 0;
00949 }
00950 }
00951 }
00952 }
00953
00954 __global__ void gpuKernelAdvect(int N, int b, float *gpuD, float *gpuD0, float *gpuU, float *gpuV, float dt, int *gpuBoundary)
00955 {
00956 int i, j, i0, j0, i1, j1;
00957 float x, y, s0, t0, s1, t1, dt0, temp;
00958 int thisEntry;
00959
00960
00961 dt0 = dt*N;
00962
00963 thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
00964 j = thisEntry / (N+2);
00965 i = thisEntry % (N+2);
00966
00967 x = i-dt0*gpuU[IX(i,j)];
00968 y = j-dt0*gpuV[IX(i,j)];
00969
00970 if (x<0.5f)
00971 x = 0.5f;
00972 if (x>N+0.5f)
00973 x = N+0.5f;
00974 i0 = (int)x;
00975 i1 = i0+1;
00976
00977 if (y<0.5f)
00978 y = 0.5f;
00979 if (y>N+0.5f)
00980 y = N+0.5f;
00981 j0 = (int)y;
00982 j1 = j0+1;
00983
00984 s1 = x-i0;
00985 s0 = 1-s1;
00986
00987 t1 = y-j0;
00988 t0 = 1-t1;
00989
00990
00991
00992
00993
00994 if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
00995 gpuD[thisEntry] = 0.0f;
00996
00997
00998 else
00999 {
01000
01001 temp = s0 * (t0*gpuD0[IX(i0,j0)] + t1*gpuD0[IX(i0,j1)]) +
01002 s1 * (t0*gpuD0[IX(i1,j0)] + t1*gpuD0[IX(i1,j1)]);
01003
01004
01005
01006 if(b == 1 && temp < 0 && gpuBoundary[IX(i,j-1)])
01007
01008 gpuD[IX(i,j)] = -temp;
01009
01010
01011
01012 else if(b == 1 && temp > 0 && gpuBoundary[IX(i,j+1)])
01013
01014 gpuD[IX(i,j)] = -temp;
01015
01016
01017
01018 else if(b == 2 && temp < 0 && gpuBoundary[IX(i-1,j)])
01019
01020 gpuD[IX(i,j)] = -temp;
01021
01022
01023
01024 else if(b == 2 && temp > 0 && gpuBoundary[IX(i+1,j)])
01025
01026 gpuD[IX(i,j)] = -temp;
01027
01028
01029 else
01030 gpuD[IX(i,j)] = temp;
01031 }
01032
01033
01034
01035 if(b == 0)
01036 {
01037 if (i == 0)
01038 {
01039 gpuD[thisEntry] = gpuD[IX(1,j)];
01040 }
01041 if (i == (N+1))
01042 {
01043 gpuD[thisEntry] = gpuD[IX(N,i)];
01044 }
01045 if (j == 0)
01046 {
01047 gpuD[thisEntry] = gpuD[IX(i,1)];
01048 }
01049 if (j == (N+1))
01050 {
01051 gpuD[thisEntry] = gpuD[IX(i,N)];
01052 }
01053
01054 if ((i == 0) && (j == 0))
01055 {
01056 gpuD[thisEntry] = 0.5f*(gpuD[IX(1,0 )] + gpuD[IX(0 ,1)]);
01057 }
01058 if ((i == 0) && (j == (N+1)))
01059 {
01060 gpuD[thisEntry] = 0.5f*(gpuD[IX(1,N+1)] + gpuD[IX(0 ,N)]);
01061 }
01062 if ((i == (N+1)) && (j == 0))
01063 {
01064 gpuD[thisEntry] = 0.5f*(gpuD[IX(N,0 )] + gpuD[IX(N+1,1)]);
01065 }
01066 if ((i == (N+1)) && (j == (N+1)))
01067 {
01068 gpuD[thisEntry] = 0.5f*(gpuD[IX(N,N+1)] + gpuD[IX(N+1,N)]);
01069 }
01070 }
01071
01072
01073
01074 else
01075 {
01076 if (i == 0)
01077 {
01078 gpuD[thisEntry] = 0;
01079 }
01080 if (i == (N+1))
01081 {
01082 gpuD[thisEntry] = 0;
01083 }
01084 if (j == 0)
01085 {
01086 gpuD[thisEntry] = 0;
01087 }
01088 if (j == (N+1))
01089 {
01090 gpuD[thisEntry] = 0;
01091 }
01092 }
01093 }
01094
01095 __global__ void gpuKernelProjectStep1(int N, float *gpuU, float *gpuV, float *gpuP, float *gpuDiv, int *gpuBoundary)
01096 {
01097 int i, j;
01098 float h, temp;
01099 int thisEntry;
01100
01101
01102 h = 1.0f/N;
01103
01104 thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
01105 i = thisEntry / (N+2);
01106 j = thisEntry % (N+2);
01107
01108
01109
01110
01111
01112 if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
01113 gpuDiv[thisEntry] = 0.0f;
01114 else
01115 {
01116
01117 temp = -0.5f*h*(gpuU[IX(i+1,j)] - gpuU[IX(i-1,j)] + gpuV[IX(i,j+1)] - gpuV[IX(i,j-1)]);
01118
01119
01120
01121 if(temp < 0 && gpuBoundary[IX(i-1,j)])
01122
01123 gpuDiv[thisEntry] = -temp;
01124
01125
01126
01127 else if(temp > 0 && gpuBoundary[IX(i+1,j)])
01128
01129 gpuDiv[thisEntry] = -temp;
01130
01131
01132 else
01133 gpuDiv[thisEntry] = temp;
01134 }
01135
01136
01137 gpuP[thisEntry] = 0.0f;
01138
01139 __syncthreads();
01140
01141
01142
01143 if (i == 0)
01144 {
01145 gpuDiv[thisEntry] = gpuDiv[IX(1,j)];
01146 }
01147 if (i == (N+1))
01148 {
01149 gpuDiv[thisEntry] = gpuDiv[IX(N,i)];
01150 }
01151 if (j == 0)
01152 {
01153 gpuDiv[thisEntry] = gpuDiv[IX(i,1)];
01154 }
01155 if (j == (N+1))
01156 {
01157 gpuDiv[thisEntry] = gpuDiv[IX(i,N)];
01158 }
01159
01160 if ((i == 0) && (j == 0))
01161 {
01162 gpuDiv[thisEntry] = 0.5f*(gpuDiv[IX(1,0 )] + gpuDiv[IX(0 ,1)]);
01163 }
01164 if ((i == 0) && (j == (N+1)))
01165 {
01166 gpuDiv[thisEntry] = 0.5f*(gpuDiv[IX(1,N+1)] + gpuDiv[IX(0 ,N)]);
01167 }
01168 if ((i == (N+1)) && (j == 0))
01169 {
01170 gpuDiv[thisEntry] = 0.5f*(gpuDiv[IX(N,0 )] + gpuDiv[IX(N+1,1)]);
01171 }
01172 if ((i == (N+1)) && (j == (N+1)))
01173 {
01174 gpuDiv[thisEntry] = 0.5f*(gpuDiv[IX(N,N+1)] + gpuDiv[IX(N+1,N)]);
01175 }
01176
01177
01178
01179 if (i == 0)
01180 {
01181 gpuP[thisEntry] = gpuP[IX(1,j)];
01182 }
01183 if (i == (N+1))
01184 {
01185 gpuP[thisEntry] = gpuP[IX(N,i)];
01186 }
01187 if (j == 0)
01188 {
01189 gpuP[thisEntry] = gpuP[IX(i,1)];
01190 }
01191 if (j == (N+1))
01192 {
01193 gpuP[thisEntry] = gpuP[IX(i,N)];
01194 }
01195
01196 if ((i == 0) && (j == 0))
01197 {
01198 gpuP[thisEntry] = 0.5f*(gpuP[IX(1,0 )] + gpuP[IX(0 ,1)]);
01199 }
01200 if ((i == 0) && (j == (N+1)))
01201 {
01202 gpuP[thisEntry] = 0.5f*(gpuP[IX(1,N+1)] + gpuP[IX(0 ,N)]);
01203 }
01204 if ((i == (N+1)) && (j == 0))
01205 {
01206 gpuP[thisEntry] = 0.5f*(gpuP[IX(N,0 )] + gpuP[IX(N+1,1)]);
01207 }
01208 if ((i == (N+1)) && (j == (N+1)))
01209 {
01210 gpuP[thisEntry] = 0.5f*(gpuP[IX(N,N+1)] + gpuP[IX(N+1,N)]);
01211 }
01212 }
01213
01214 __global__ void gpuKernelProjectStep2(int N, float *gpuP, float *gpuDiv, int *gpuBoundary)
01215 {
01216 int i, j, k;
01217 float temp;
01218 int thisEntry;
01219
01220
01221 for (k=0; k<20; k++)
01222 {
01223 thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
01224 i = thisEntry / (N+2);
01225 j = thisEntry % (N+2);
01226
01227
01228
01229
01230
01231 if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
01232 gpuP[thisEntry] = 0.0f;
01233 else
01234 {
01235
01236 temp = (gpuDiv[thisEntry] + gpuP[IX(i-1,j)] + gpuP[IX(i+1,j)] + gpuP[IX(i,j-1)] + gpuP[IX(i,j+1)]) / 4;
01237
01238
01239
01240 if(temp < 0 && gpuBoundary[IX(i,j-1)])
01241
01242 gpuP[thisEntry] = -temp;
01243
01244
01245
01246 else if(temp > 0 && gpuBoundary[IX(i,j+1)])
01247
01248 gpuP[thisEntry] = -temp;
01249
01250
01251 else
01252 gpuP[thisEntry] = temp;
01253 }
01254 __syncthreads();
01255
01256
01257 if (i == 0)
01258 {
01259 gpuP[thisEntry] = gpuP[IX(1,j)];
01260 }
01261 if (i == (N+1))
01262 {
01263 gpuP[thisEntry] = gpuP[IX(N,i)];
01264 }
01265 if (j == 0)
01266 {
01267 gpuP[thisEntry] = gpuP[IX(i,1)];
01268 }
01269 if (j == (N+1))
01270 {
01271 gpuP[thisEntry] = gpuP[IX(i,N)];
01272 }
01273
01274 if ((i == 0) && (j == 0))
01275 {
01276 gpuP[thisEntry] = 0.5f*(gpuP[IX(1,0 )] + gpuP[IX(0 ,1)]);
01277 }
01278 if ((i == 0) && (j == (N+1)))
01279 {
01280 gpuP[thisEntry] = 0.5f*(gpuP[IX(1,N+1)] + gpuP[IX(0 ,N)]);
01281 }
01282 if ((i == (N+1)) && (j == 0))
01283 {
01284 gpuP[thisEntry] = 0.5f*(gpuP[IX(N,0 )] + gpuP[IX(N+1,1)]);
01285 }
01286 if ((i == (N+1)) && (j == (N+1)))
01287 {
01288 gpuP[thisEntry] = 0.5f*(gpuP[IX(N,N+1)] + gpuP[IX(N+1,N)]);
01289 }
01290 }
01291 }
01292
01293 __global__ void gpuKernelProjectStep3(int N, float *gpuU, float *gpuV, float *gpuP, int *gpuBoundary)
01294 {
01295 int i, j;
01296 float h, temp;
01297 int thisEntry;
01298
01299
01300 h = 1.0f/N;
01301
01302 thisEntry = blockIdx.x*BLOCK_SIZE + threadIdx.x;
01303 i = thisEntry / (N+2);
01304 j = thisEntry % (N+2);
01305
01306
01307
01308
01309
01310 if (i == 0 || i == N+1 || j == 0 || j == N+1 || gpuBoundary[thisEntry])
01311 {
01312 gpuU[thisEntry] = 0.0f;
01313 gpuV[thisEntry] = 0.0f;
01314 }
01315
01316
01317 else
01318 {
01319
01320 temp = 0.5f * (gpuP[IX(i+1,j)] - gpuP[IX(i-1,j)]) / h;
01321
01322
01323
01324 if(temp < 0 && gpuBoundary[IX(i,j-1)])
01325
01326 gpuU[thisEntry] -= -temp;
01327
01328
01329
01330 else if(temp > 0 && gpuBoundary[IX(i,j+1)])
01331
01332 gpuU[thisEntry] -= -temp;
01333
01334
01335 else
01336 gpuU[thisEntry] -= temp;
01337
01338
01339 temp = 0.5f * (gpuP[IX(i,j+1)] - gpuP[IX(i,j-1)]) / h;
01340
01341
01342
01343 if(temp < 0 && gpuBoundary[IX(i-1,j)])
01344
01345 gpuV[thisEntry] -= -temp;
01346
01347
01348
01349 else if(temp > 0 && gpuBoundary[IX(i+1,j)])
01350
01351 gpuV[thisEntry] -= -temp;
01352
01353
01354 else
01355 gpuV[thisEntry] -= temp;
01356 }
01357
01358 __syncthreads();
01359
01360
01361
01362 if (i == 0)
01363 {
01364 gpuU[thisEntry] = gpuU[IX(1,j)];
01365 }
01366 if (i == (N+1))
01367 {
01368 gpuU[thisEntry] = gpuU[IX(N,i)];
01369 }
01370 if (j == 0)
01371 {
01372 gpuU[thisEntry] = gpuU[IX(i,1)];
01373 }
01374 if (j == (N+1))
01375 {
01376 gpuU[thisEntry] = gpuU[IX(i,N)];
01377 }
01378
01379 if ((i == 0) && (j == 0))
01380 {
01381 gpuU[thisEntry] = 0.5f*(gpuU[IX(1,0 )] + gpuU[IX(0 ,1)]);
01382 }
01383 if ((i == 0) && (j == (N+1)))
01384 {
01385 gpuU[thisEntry] = 0.5f*(gpuU[IX(1,N+1)] + gpuU[IX(0 ,N)]);
01386 }
01387 if ((i == (N+1)) && (j == 0))
01388 {
01389 gpuU[thisEntry] = 0.5f*(gpuU[IX(N,0 )] + gpuU[IX(N+1,1)]);
01390 }
01391 if ((i == (N+1)) && (j == (N+1)))
01392 {
01393 gpuU[thisEntry] = 0.5f*(gpuU[IX(N,N+1)] + gpuU[IX(N+1,N)]);
01394 }
01395
01396
01397
01398 if (i == 0)
01399 {
01400 gpuV[thisEntry] = gpuV[IX(1,j)];
01401 }
01402 if (i == (N+1))
01403 {
01404 gpuV[thisEntry] = gpuV[IX(N,i)];
01405 }
01406 if (j == 0)
01407 {
01408 gpuV[thisEntry] = gpuV[IX(i,1)];
01409 }
01410 if (j == (N+1))
01411 {
01412 gpuV[thisEntry] = gpuV[IX(i,N)];
01413 }
01414
01415 if ((i == 0) && (j == 0))
01416 {
01417 gpuV[thisEntry] = 0.5f*(gpuV[IX(1,0 )] + gpuV[IX(0 ,1)]);
01418 }
01419 if ((i == 0) && (j == (N+1)))
01420 {
01421 gpuV[thisEntry] = 0.5f*(gpuV[IX(1,N+1)] + gpuV[IX(0 ,N)]);
01422 }
01423 if ((i == (N+1)) && (j == 0))
01424 {
01425 gpuV[thisEntry] = 0.5f*(gpuV[IX(N,0 )] + gpuV[IX(N+1,1)]);
01426 }
01427 if ((i == (N+1)) && (j == (N+1)))
01428 {
01429 gpuV[thisEntry] = 0.5f*(gpuV[IX(N,N+1)] + gpuV[IX(N+1,N)]);
01430 }
01431 }
01432
01433 void gpuCalcFluid()
01434 {
01435 float *gpuU;
01436 float *gpuV;
01437 float *gpuUPrev;
01438 float *gpuVPrev;
01439 float *gpuDens;
01440 float *gpuDensPrev;
01441 int *gpuBoundary;
01442 int result;
01443
01444
01445 result = cudaMalloc((void**)&gpuU, sizeof(float)*SIZE);
01446 if (result != cudaSuccess) {
01447 printf("In gpuCalcFluid: cudaMalloc for gpuU failed.\n");
01448 exit(1);
01449 }
01450 result = cudaMalloc((void**)&gpuV, sizeof(float)*SIZE);
01451 if (result != cudaSuccess) {
01452 printf("In gpuCalcFluid: cudaMalloc for gpuV failed.\n");
01453 exit(1);
01454 }
01455 result = cudaMalloc((void**)&gpuUPrev, sizeof(float)*SIZE);
01456 if (result != cudaSuccess) {
01457 printf("In gpuCalcFluid: cudaMalloc for gpuUPrev failed.\n");
01458 exit(1);
01459 }
01460 result = cudaMalloc((void**)&gpuVPrev, sizeof(float)*SIZE);
01461 if (result != cudaSuccess) {
01462 printf("In gpuCalcFluid: cudaMalloc for gpuVPrev failed.\n");
01463 exit(1);
01464 }
01465 result = cudaMalloc((void**)&gpuDens, sizeof(float)*SIZE);
01466 if (result != cudaSuccess) {
01467 printf("In gpuCalcFluid: cudaMalloc for gpuDens failed.\n");
01468 exit(1);
01469 }
01470 result = cudaMalloc((void**)&gpuDensPrev, sizeof(float)*SIZE);
01471 if (result != cudaSuccess) {
01472 printf("In gpuCalcFluid: cudaMalloc for gpuDensPrev failed.\n");
01473 exit(1);
01474 }
01475 result = cudaMalloc((void**)&gpuBoundary, sizeof(int)*SIZE);
01476 if (result != cudaSuccess) {
01477 printf("In gpuCalcFluid: cudaMalloc for gpuBoundary failed.\n");
01478 exit(1);
01479 }
01480
01481
01482 result = cudaMemcpy(gpuU, u, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01483 if (result != cudaSuccess) {
01484 printf("In gpuCalcFluid: cudaMemcpy for gpuU failed.\n");
01485 exit(1);
01486 }
01487 result = cudaMemcpy(gpuV, v, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01488 if (result != cudaSuccess) {
01489 printf("In gpuCalcFluid: cudaMemcpy for gpuV failed.\n");
01490 exit(1);
01491 }
01492 result = cudaMemcpy(gpuUPrev, u_prev, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01493 if (result != cudaSuccess) {
01494 printf("In gpuCalcFluid: cudaMemcpy for gpuUPrev failed.\n");
01495 exit(1);
01496 }
01497 result = cudaMemcpy(gpuVPrev, v_prev, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01498 if (result != cudaSuccess) {
01499 printf("In gpuCalcFluid: cudaMemcpy for gpuVPrev failed.\n");
01500 exit(1);
01501 }
01502 result = cudaMemcpy(gpuDens, dens, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01503 if (result != cudaSuccess) {
01504 printf("In gpuCalcFluid: cudaMemcpy for gpuDens failed.\n");
01505 exit(1);
01506 }
01507 result = cudaMemcpy(gpuDensPrev, dens_prev, sizeof(float)*SIZE, cudaMemcpyHostToDevice);
01508 if (result != cudaSuccess) {
01509 printf("In gpuCalcFluid: cudaMemcpy for gpuDensPrev failed.\n");
01510 exit(1);
01511 }
01512 result = cudaMemcpy(gpuBoundary, boundary, sizeof(int)*SIZE, cudaMemcpyHostToDevice);
01513 if (result != cudaSuccess) {
01514 printf("In gpuProject: cudaMemcpy for gpuBoundary failed.\n");
01515 exit(1);
01516 }
01517
01518
01519 dim3 dimblock(BLOCK_SIZE);
01520
01521 dim3 dimgrid(SIZE/BLOCK_SIZE);
01522
01523
01524 gpuKernelAdd_Source<<<dimgrid,dimblock>>>(N,gpuDens,gpuDensPrev,dt);
01525 SWAP (gpuDensPrev, gpuDens);
01526 gpuKernelDiffuse<<<dimgrid,dimblock>>>(N,1,gpuDens,gpuDensPrev,diff,dt,gpuBoundary);
01527 SWAP (gpuDensPrev, gpuDens);
01528 gpuKernelAdvect<<<dimgrid,dimblock>>>(N,1,gpuDens,gpuDensPrev,gpuU,gpuV,dt,gpuBoundary);
01529
01530
01531 gpuKernelAdd_Source<<<dimgrid,dimblock>>>(N,gpuU,gpuUPrev,dt);
01532 gpuKernelAdd_Source<<<dimgrid,dimblock>>>(N,gpuV,gpuVPrev,dt);
01533 SWAP (gpuUPrev,gpuU);
01534 gpuKernelDiffuse<<<dimgrid,dimblock>>>(N,0,gpuU,gpuUPrev,visc,dt,gpuBoundary);
01535 SWAP (gpuVPrev,gpuV);
01536 gpuKernelDiffuse<<<dimgrid,dimblock>>>(N,0,gpuV,gpuVPrev,visc,dt,gpuBoundary);
01537 gpuKernelProjectStep1<<<dimgrid,dimblock>>>(N, gpuU, gpuV, gpuUPrev, gpuVPrev, gpuBoundary);
01538 gpuKernelProjectStep2<<<dimgrid,dimblock>>>(N, gpuUPrev, gpuVPrev, gpuBoundary);
01539 gpuKernelProjectStep3<<<dimgrid,dimblock>>>(N, gpuU, gpuV, gpuUPrev, gpuBoundary);
01540 SWAP (gpuUPrev,gpuU);
01541 SWAP (gpuVPrev,gpuV);
01542 gpuKernelAdvect<<<dimgrid,dimblock>>>(N,0,gpuU,gpuUPrev,gpuUPrev,gpuVPrev,dt,gpuBoundary);
01543 gpuKernelAdvect<<<dimgrid,dimblock>>>(N,0,gpuV,gpuVPrev,gpuUPrev,gpuVPrev,dt,gpuBoundary);
01544 gpuKernelProjectStep1<<<dimgrid,dimblock>>>(N, gpuU, gpuV, gpuUPrev, gpuVPrev, gpuBoundary);
01545 gpuKernelProjectStep2<<<dimgrid,dimblock>>>(N, gpuUPrev, gpuVPrev, gpuBoundary);
01546 gpuKernelProjectStep3<<<dimgrid,dimblock>>>(N, gpuU, gpuV, gpuUPrev, gpuBoundary);
01547
01548
01549 result = cudaMemcpy(u, gpuU, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01550 if (result != cudaSuccess) {
01551 printf("In gpuCalcFluid: cudaMemcpy back to host for u failed.\n");
01552 exit(1);
01553 }
01554 result = cudaMemcpy(v, gpuV, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01555 if (result != cudaSuccess) {
01556 printf("In gpuCalcFluid: cudaMemcpy back to host for v failed.\n");
01557 exit(1);
01558 }
01559 result = cudaMemcpy(u_prev, gpuUPrev, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01560 if (result != cudaSuccess) {
01561 printf("In gpuCalcFluid: cudaMemcpy back to host for u_prev failed.\n");
01562 exit(1);
01563 }
01564 result = cudaMemcpy(v_prev, gpuVPrev, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01565 if (result != cudaSuccess) {
01566 printf("In gpuCalcFluid: cudaMemcpy back to host for v_prev failed.\n");
01567 exit(1);
01568 }
01569 result = cudaMemcpy(dens, gpuDens, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01570 if (result != cudaSuccess) {
01571 printf("In gpuCalcFluid: cudaMemcpy back to host for dens failed.\n");
01572 exit(1);
01573 }
01574 result = cudaMemcpy(dens_prev, gpuDensPrev, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);
01575 if (result != cudaSuccess) {
01576 printf("In gpuCalcFluid: cudaMemcpy back to host for dens_prev failed.\n");
01577 exit(1);
01578 }
01579
01580
01581 result = cudaFree(gpuU);
01582 if (result != cudaSuccess) {
01583 printf("In gpuCalcFluid: cudaFree gpuU failed.\n");
01584 exit(1);
01585 }
01586 result = cudaFree(gpuV);
01587 if (result != cudaSuccess) {
01588 printf("In gpuCalcFluid: cudaFree gpuV failed.\n");
01589 exit(1);
01590 }
01591 result = cudaFree(gpuUPrev);
01592 if (result != cudaSuccess) {
01593 printf("In gpuCalcFluid: cudaFree gpuUPrev failed.\n");
01594 exit(1);
01595 }
01596 result = cudaFree(gpuVPrev);
01597 if (result != cudaSuccess) {
01598 printf("In gpuCalcFluid: cudaFree gpuVPrev failed.\n");
01599 exit(1);
01600 }
01601 result = cudaFree(gpuDens);
01602 if (result != cudaSuccess) {
01603 printf("In gpuCalcFluid: cudaFree gpuDens failed.\n");
01604 exit(1);
01605 }
01606 result = cudaFree(gpuDensPrev);
01607 if (result != cudaSuccess) {
01608 printf("In gpuCalcFluid: cudaFree gpuDensPrev failed.\n");
01609 exit(1);
01610 }
01611 result = cudaFree(gpuBoundary);
01612 if (result != cudaSuccess) {
01613 printf("In gpuProject: cudaFree gpuBoundary failed.\n");
01614 exit(1);
01615 }
01616 }