2 * Copyright 2011, Blender Foundation.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
28 #include "COM_ExecutionGroup.h"
29 #include "COM_InputSocket.h"
30 #include "COM_SocketConnection.h"
31 #include "COM_defines.h"
32 #include "COM_ExecutionSystem.h"
33 #include "COM_ReadBufferOperation.h"
34 #include "COM_WriteBufferOperation.h"
35 #include "COM_ReadBufferOperation.h"
36 #include "COM_WorkScheduler.h"
37 #include "COM_ViewerOperation.h"
38 #include "COM_ChunkOrder.h"
39 #include "COM_ExecutionSystemHelper.h"
41 #include "MEM_guardedalloc.h"
47 ExecutionGroup::ExecutionGroup()
49 this->m_isOutput = false;
50 this->m_complex = false;
51 this->m_chunkExecutionStates = NULL;
55 this->m_cachedMaxReadBufferOffset = 0;
56 this->m_numberOfXChunks = 0;
57 this->m_numberOfYChunks = 0;
58 this->m_numberOfChunks = 0;
59 this->m_initialized = false;
60 this->m_openCL = false;
61 this->m_singleThreaded = false;
62 this->m_chunksFinished = 0;
65 CompositorPriority ExecutionGroup::getRenderPriotrity()
67 return this->getOutputNodeOperation()->getRenderPriority();
70 bool ExecutionGroup::containsOperation(NodeOperation *operation)
72 for (vector<NodeOperation *>::const_iterator iterator = this->m_operations.begin(); iterator != this->m_operations.end(); ++iterator) {
73 NodeOperation *inListOperation = *iterator;
74 if (inListOperation == operation) {
81 const bool ExecutionGroup::isComplex() const
83 return this->m_complex;
86 bool ExecutionGroup::canContainOperation(NodeOperation *operation)
88 if (!this->m_initialized) { return true; }
89 if (operation->isReadBufferOperation()) { return true; }
90 if (operation->isWriteBufferOperation()) { return false; }
91 if (operation->isSetOperation()) { return true; }
93 if (!this->isComplex()) {
94 return (!operation->isComplex());
101 void ExecutionGroup::addOperation(ExecutionSystem *system, NodeOperation *operation)
103 /* should never happen but in rare cases it can - it causes confusing crashes */
104 BLI_assert(operation->isOperation() == true);
106 if (containsOperation(operation)) return;
107 if (canContainOperation(operation)) {
108 if (!operation->isBufferOperation()) {
109 this->m_complex = operation->isComplex();
110 this->m_openCL = operation->isOpenCL();
111 this->m_singleThreaded = operation->isSingleThreaded();
112 this->m_initialized = true;
114 this->m_operations.push_back(operation);
115 if (operation->isReadBufferOperation()) {
116 ReadBufferOperation *readOperation = (ReadBufferOperation *)operation;
117 WriteBufferOperation *writeOperation = readOperation->getMemoryProxy()->getWriteBufferOperation();
118 this->addOperation(system, writeOperation);
122 for (index = 0; index < operation->getNumberOfInputSockets(); index++) {
123 InputSocket *inputSocket = operation->getInputSocket(index);
124 if (inputSocket->isConnected()) {
125 NodeOperation *node = (NodeOperation *)inputSocket->getConnection()->getFromNode();
126 this->addOperation(system, node);
132 if (operation->isWriteBufferOperation()) {
133 WriteBufferOperation *writeoperation = (WriteBufferOperation *)operation;
134 if (writeoperation->getMemoryProxy()->getExecutor() == NULL) {
135 ExecutionGroup *newGroup = new ExecutionGroup();
136 writeoperation->getMemoryProxy()->setExecutor(newGroup);
137 newGroup->addOperation(system, operation);
138 ExecutionSystemHelper::addExecutionGroup(system->getExecutionGroups(), newGroup);
144 NodeOperation *ExecutionGroup::getOutputNodeOperation() const
146 return this->m_operations[0]; // the first operation of the group is always the output operation.
149 void ExecutionGroup::initExecution()
151 if (this->m_chunkExecutionStates != NULL) {
152 MEM_freeN(this->m_chunkExecutionStates);
155 determineNumberOfChunks();
157 this->m_chunkExecutionStates = NULL;
158 if (this->m_numberOfChunks != 0) {
159 this->m_chunkExecutionStates = (ChunkExecutionState *)MEM_mallocN(sizeof(ChunkExecutionState) * this->m_numberOfChunks, __func__);
160 for (index = 0; index < this->m_numberOfChunks; index++) {
161 this->m_chunkExecutionStates[index] = COM_ES_NOT_SCHEDULED;
166 unsigned int maxNumber = 0;
168 for (index = 0; index < this->m_operations.size(); index++) {
169 NodeOperation *operation = this->m_operations[index];
170 if (operation->isReadBufferOperation()) {
171 ReadBufferOperation *readOperation = (ReadBufferOperation *)operation;
172 this->m_cachedReadOperations.push_back(readOperation);
173 maxNumber = max(maxNumber, readOperation->getOffset());
177 this->m_cachedMaxReadBufferOffset = maxNumber;
181 void ExecutionGroup::deinitExecution()
183 if (this->m_chunkExecutionStates != NULL) {
184 MEM_freeN(this->m_chunkExecutionStates);
185 this->m_chunkExecutionStates = NULL;
187 this->m_numberOfChunks = 0;
188 this->m_numberOfXChunks = 0;
189 this->m_numberOfYChunks = 0;
190 this->m_cachedReadOperations.clear();
191 this->m_bTree = NULL;
193 void ExecutionGroup::determineResolution(unsigned int resolution[2])
195 NodeOperation *operation = this->getOutputNodeOperation();
196 resolution[0] = operation->getWidth();
197 resolution[1] = operation->getHeight();
198 this->setResolution(resolution);
201 void ExecutionGroup::determineNumberOfChunks()
203 if (this->m_singleThreaded) {
204 this->m_numberOfXChunks = 1;
205 this->m_numberOfYChunks = 1;
206 this->m_numberOfChunks = 1;
209 const float chunkSizef = this->m_chunkSize;
210 this->m_numberOfXChunks = ceil(this->m_width / chunkSizef);
211 this->m_numberOfYChunks = ceil(this->m_height / chunkSizef);
212 this->m_numberOfChunks = this->m_numberOfXChunks * this->m_numberOfYChunks;
217 * this method is called for the top execution groups. containing the compositor node or the preview node or the viewer node)
219 void ExecutionGroup::execute(ExecutionSystem *graph)
221 CompositorContext &context = graph->getContext();
222 const bNodeTree *bTree = context.getbNodeTree();
223 if (this->m_width == 0 || this->m_height == 0) {return; } /// @note: break out... no pixels to calculate.
224 if (bTree->test_break && bTree->test_break(bTree->tbh)) {return; } /// @note: early break out for blur and preview nodes
225 if (this->m_numberOfChunks == 0) {return; } /// @note: early break out
226 unsigned int chunkNumber;
228 this->m_chunksFinished = 0;
229 this->m_bTree = bTree;
231 unsigned int *chunkOrder = (unsigned int *)MEM_mallocN(sizeof(unsigned int) * this->m_numberOfChunks, __func__);
233 for (chunkNumber = 0; chunkNumber < this->m_numberOfChunks; chunkNumber++) {
234 chunkOrder[chunkNumber] = chunkNumber;
236 NodeOperation *operation = this->getOutputNodeOperation();
239 OrderOfChunks chunkorder = COM_ORDER_OF_CHUNKS_DEFAULT;
241 if (operation->isViewerOperation()) {
242 ViewerBaseOperation *viewer = (ViewerBaseOperation *)operation;
243 centerX = viewer->getCenterX();
244 centerY = viewer->getCenterY();
245 chunkorder = viewer->getChunkOrder();
248 switch (chunkorder) {
250 for (index = 0; index < 2 * this->m_numberOfChunks; index++) {
251 int index1 = rand() % this->m_numberOfChunks;
252 int index2 = rand() % this->m_numberOfChunks;
253 int s = chunkOrder[index1];
254 chunkOrder[index1] = chunkOrder[index2];
255 chunkOrder[index2] = s;
258 case COM_TO_CENTER_OUT:
260 ChunkOrderHotspot *hotspots[1];
261 hotspots[0] = new ChunkOrderHotspot(this->m_width * centerX, this->m_height * centerY, 0.0f);
263 ChunkOrder *chunkOrders = (ChunkOrder *)MEM_mallocN(sizeof(ChunkOrder) * this->m_numberOfChunks, __func__);
264 for (index = 0; index < this->m_numberOfChunks; index++) {
265 determineChunkRect(&rect, index);
266 chunkOrders[index].setChunkNumber(index);
267 chunkOrders[index].setX(rect.xmin);
268 chunkOrders[index].setY(rect.ymin);
269 chunkOrders[index].determineDistance(hotspots, 1);
272 sort(&chunkOrders[0], &chunkOrders[this->m_numberOfChunks - 1]);
273 for (index = 0; index < this->m_numberOfChunks; index++) {
274 chunkOrder[index] = chunkOrders[index].getChunkNumber();
278 MEM_freeN(chunkOrders);
281 case COM_TO_RULE_OF_THIRDS:
283 ChunkOrderHotspot *hotspots[9];
284 unsigned int tx = this->m_width / 6;
285 unsigned int ty = this->m_height / 6;
286 unsigned int mx = this->m_width / 2;
287 unsigned int my = this->m_height / 2;
288 unsigned int bx = mx + 2 * tx;
289 unsigned int by = my + 2 * ty;
291 float addition = this->m_numberOfChunks / COM_RULE_OF_THIRDS_DIVIDER;
292 hotspots[0] = new ChunkOrderHotspot(mx, my, addition * 0);
293 hotspots[1] = new ChunkOrderHotspot(tx, my, addition * 1);
294 hotspots[2] = new ChunkOrderHotspot(bx, my, addition * 2);
295 hotspots[3] = new ChunkOrderHotspot(bx, by, addition * 3);
296 hotspots[4] = new ChunkOrderHotspot(tx, ty, addition * 4);
297 hotspots[5] = new ChunkOrderHotspot(bx, ty, addition * 5);
298 hotspots[6] = new ChunkOrderHotspot(tx, by, addition * 6);
299 hotspots[7] = new ChunkOrderHotspot(mx, ty, addition * 7);
300 hotspots[8] = new ChunkOrderHotspot(mx, by, addition * 8);
302 ChunkOrder *chunkOrders = (ChunkOrder *)MEM_mallocN(sizeof(ChunkOrder) * this->m_numberOfChunks, __func__);
303 for (index = 0; index < this->m_numberOfChunks; index++) {
304 determineChunkRect(&rect, index);
305 chunkOrders[index].setChunkNumber(index);
306 chunkOrders[index].setX(rect.xmin);
307 chunkOrders[index].setY(rect.ymin);
308 chunkOrders[index].determineDistance(hotspots, 9);
311 sort(&chunkOrders[0], &chunkOrders[this->m_numberOfChunks]);
313 for (index = 0; index < this->m_numberOfChunks; index++) {
314 chunkOrder[index] = chunkOrders[index].getChunkNumber();
326 MEM_freeN(chunkOrders);
329 case COM_TO_TOP_DOWN:
334 bool breaked = false;
335 bool finished = false;
336 unsigned int startIndex = 0;
337 const int maxNumberEvaluated = BLI_system_thread_count() * 2;
339 while (!finished && !breaked) {
340 bool startEvaluated = false;
342 int numberEvaluated = 0;
344 for (index = startIndex; index < this->m_numberOfChunks && numberEvaluated < maxNumberEvaluated; index++) {
345 chunkNumber = chunkOrder[index];
346 int yChunk = chunkNumber / this->m_numberOfXChunks;
347 int xChunk = chunkNumber - (yChunk * this->m_numberOfXChunks);
348 const ChunkExecutionState state = this->m_chunkExecutionStates[chunkNumber];
349 if (state == COM_ES_NOT_SCHEDULED) {
350 scheduleChunkWhenPossible(graph, xChunk, yChunk);
352 startEvaluated = true;
355 if (bTree->update_draw)
356 bTree->update_draw(bTree->udh);
358 else if (state == COM_ES_SCHEDULED) {
360 startEvaluated = true;
363 else if (state == COM_ES_EXECUTED && !startEvaluated) {
364 startIndex = index + 1;
368 WorkScheduler::finish();
370 if (bTree->test_break && bTree->test_break(bTree->tbh)) {
375 MEM_freeN(chunkOrder);
378 MemoryBuffer **ExecutionGroup::getInputBuffersOpenCL(int chunkNumber)
381 vector<MemoryProxy *> memoryproxies;
383 determineChunkRect(&rect, chunkNumber);
385 this->determineDependingMemoryProxies(&memoryproxies);
386 MemoryBuffer **memoryBuffers = (MemoryBuffer **)MEM_callocN(sizeof(MemoryBuffer *) * this->m_cachedMaxReadBufferOffset, __func__);
388 for (index = 0; index < this->m_cachedReadOperations.size(); index++) {
389 ReadBufferOperation *readOperation = (ReadBufferOperation *)this->m_cachedReadOperations[index];
390 MemoryProxy *memoryProxy = readOperation->getMemoryProxy();
391 this->determineDependingAreaOfInterest(&rect, readOperation, &output);
392 MemoryBuffer *memoryBuffer = memoryProxy->getExecutor()->constructConsolidatedMemoryBuffer(memoryProxy, &output);
393 memoryBuffers[readOperation->getOffset()] = memoryBuffer;
395 return memoryBuffers;
398 MemoryBuffer *ExecutionGroup::constructConsolidatedMemoryBuffer(MemoryProxy *memoryProxy, rcti *rect)
400 MemoryBuffer *imageBuffer = memoryProxy->getBuffer();
401 MemoryBuffer *result = new MemoryBuffer(memoryProxy, rect);
402 result->copyContentFrom(imageBuffer);
406 void ExecutionGroup::finalizeChunkExecution(int chunkNumber, MemoryBuffer **memoryBuffers)
408 if (this->m_chunkExecutionStates[chunkNumber] == COM_ES_SCHEDULED)
409 this->m_chunkExecutionStates[chunkNumber] = COM_ES_EXECUTED;
411 this->m_chunksFinished++;
413 for (unsigned int index = 0; index < this->m_cachedMaxReadBufferOffset; index++) {
414 MemoryBuffer *buffer = memoryBuffers[index];
416 if (buffer->isTemporarily()) {
417 memoryBuffers[index] = NULL;
422 MEM_freeN(memoryBuffers);
425 // status report is only performed for top level Execution Groups.
426 float progress = this->m_chunksFinished;
427 progress /= this->m_numberOfChunks;
428 this->m_bTree->progress(this->m_bTree->prh, progress);
432 inline void ExecutionGroup::determineChunkRect(rcti *rect, const unsigned int xChunk, const unsigned int yChunk) const
434 if (this->m_singleThreaded) {
435 BLI_rcti_init(rect, 0, this->m_width, 0, this->m_height);
438 const unsigned int minx = xChunk * this->m_chunkSize;
439 const unsigned int miny = yChunk * this->m_chunkSize;
440 BLI_rcti_init(rect, minx, min(minx + this->m_chunkSize, this->m_width), miny, min(miny + this->m_chunkSize, this->m_height));
444 void ExecutionGroup::determineChunkRect(rcti *rect, const unsigned int chunkNumber) const
446 const unsigned int yChunk = chunkNumber / this->m_numberOfXChunks;
447 const unsigned int xChunk = chunkNumber - (yChunk * this->m_numberOfXChunks);
448 determineChunkRect(rect, xChunk, yChunk);
451 MemoryBuffer *ExecutionGroup::allocateOutputBuffer(int chunkNumber, rcti *rect)
453 // we asume that this method is only called from complex execution groups.
454 NodeOperation *operation = this->getOutputNodeOperation();
455 if (operation->isWriteBufferOperation()) {
456 WriteBufferOperation *writeOperation = (WriteBufferOperation *)operation;
457 MemoryBuffer *buffer = new MemoryBuffer(writeOperation->getMemoryProxy(), rect);
464 bool ExecutionGroup::scheduleAreaWhenPossible(ExecutionSystem *graph, rcti *area)
466 if (this->m_singleThreaded) {
467 return scheduleChunkWhenPossible(graph, 0, 0);
469 // find all chunks inside the rect
470 // determine minxchunk, minychunk, maxxchunk, maxychunk where x and y are chunknumbers
472 float chunkSizef = this->m_chunkSize;
475 int minxchunk = floor(area->xmin / chunkSizef);
476 int maxxchunk = ceil((area->xmax - 1) / chunkSizef);
477 int minychunk = floor(area->ymin / chunkSizef);
478 int maxychunk = ceil((area->ymax - 1) / chunkSizef);
479 minxchunk = max(minxchunk, 0);
480 minychunk = max(minychunk, 0);
481 maxxchunk = min(maxxchunk, (int)this->m_numberOfXChunks);
482 maxychunk = min(maxychunk, (int)this->m_numberOfYChunks);
485 for (indexx = minxchunk; indexx < maxxchunk; indexx++) {
486 for (indexy = minychunk; indexy < maxychunk; indexy++) {
487 if (!scheduleChunkWhenPossible(graph, indexx, indexy)) {
496 bool ExecutionGroup::scheduleChunk(unsigned int chunkNumber)
498 if (this->m_chunkExecutionStates[chunkNumber] == COM_ES_NOT_SCHEDULED) {
499 this->m_chunkExecutionStates[chunkNumber] = COM_ES_SCHEDULED;
500 WorkScheduler::schedule(this, chunkNumber);
506 bool ExecutionGroup::scheduleChunkWhenPossible(ExecutionSystem *graph, int xChunk, int yChunk)
508 if (xChunk < 0 || xChunk >= (int)this->m_numberOfXChunks) {
511 if (yChunk < 0 || yChunk >= (int)this->m_numberOfYChunks) {
514 int chunkNumber = yChunk * this->m_numberOfXChunks + xChunk;
515 // chunk is already executed
516 if (this->m_chunkExecutionStates[chunkNumber] == COM_ES_EXECUTED) {
520 // chunk is scheduled, but not executed
521 if (this->m_chunkExecutionStates[chunkNumber] == COM_ES_SCHEDULED) {
525 // chunk is nor executed nor scheduled.
526 vector<MemoryProxy *> memoryProxies;
527 this->determineDependingMemoryProxies(&memoryProxies);
530 determineChunkRect(&rect, xChunk, yChunk);
532 bool canBeExecuted = true;
535 for (index = 0; index < this->m_cachedReadOperations.size(); index++) {
536 ReadBufferOperation *readOperation = (ReadBufferOperation *)this->m_cachedReadOperations[index];
537 BLI_rcti_init(&area, 0, 0, 0, 0);
538 MemoryProxy *memoryProxy = memoryProxies[index];
539 determineDependingAreaOfInterest(&rect, readOperation, &area);
540 ExecutionGroup *group = memoryProxy->getExecutor();
543 if (!group->scheduleAreaWhenPossible(graph, &area)) {
544 canBeExecuted = false;
553 scheduleChunk(chunkNumber);
559 void ExecutionGroup::determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
561 this->getOutputNodeOperation()->determineDependingAreaOfInterest(input, readOperation, output);
564 void ExecutionGroup::determineDependingMemoryProxies(vector<MemoryProxy *> *memoryProxies)
567 for (index = 0; index < this->m_cachedReadOperations.size(); index++) {
568 ReadBufferOperation *readOperation = (ReadBufferOperation *) this->m_cachedReadOperations[index];
569 memoryProxies->push_back(readOperation->getMemoryProxy());
573 bool ExecutionGroup::isOpenCL()
575 return this->m_openCL;