63 lines
2.3 KiB
Python
Executable File
63 lines
2.3 KiB
Python
Executable File
from tensorflow.keras.layers import Lambda, concatenate
|
|
from tensorflow.keras.models import Model
|
|
import tensorflow as tf
|
|
|
|
def multi_gpu_model(model, gpus):
|
|
if isinstance(gpus, (list, tuple)):
|
|
num_gpus = len(gpus)
|
|
target_gpu_ids = gpus
|
|
else:
|
|
num_gpus = gpus
|
|
target_gpu_ids = range(num_gpus)
|
|
|
|
def get_slice(data, i, parts):
|
|
shape = tf.shape(data)
|
|
batch_size = shape[:1]
|
|
input_shape = shape[1:]
|
|
step = batch_size // parts
|
|
if i == num_gpus - 1:
|
|
size = batch_size - step * i
|
|
else:
|
|
size = step
|
|
size = tf.concat([size, input_shape], axis=0)
|
|
stride = tf.concat([step, input_shape * 0], axis=0)
|
|
start = stride * i
|
|
return tf.slice(data, start, size)
|
|
|
|
all_outputs = []
|
|
for i in range(len(model.outputs)):
|
|
all_outputs.append([])
|
|
|
|
# Place a copy of the model on each GPU,
|
|
# each getting a slice of the inputs.
|
|
for i, gpu_id in enumerate(target_gpu_ids):
|
|
with tf.device('/gpu:%d' % gpu_id):
|
|
with tf.name_scope('replica_%d' % gpu_id):
|
|
inputs = []
|
|
# Retrieve a slice of the input.
|
|
for x in model.inputs:
|
|
input_shape = tuple(x.get_shape().as_list())[1:]
|
|
slice_i = Lambda(get_slice,
|
|
output_shape=input_shape,
|
|
arguments={'i': i,
|
|
'parts': num_gpus})(x)
|
|
inputs.append(slice_i)
|
|
|
|
# Apply model on slice
|
|
# (creating a model replica on the target device).
|
|
outputs = model(inputs)
|
|
if not isinstance(outputs, list):
|
|
outputs = [outputs]
|
|
|
|
# Save the outputs for merging back together later.
|
|
for o in range(len(outputs)):
|
|
all_outputs[o].append(outputs[o])
|
|
|
|
# Merge outputs on CPU.
|
|
with tf.device('/cpu:0'):
|
|
merged = []
|
|
for name, outputs in zip(model.output_names, all_outputs):
|
|
merged.append(concatenate(outputs,
|
|
axis=0, name=name))
|
|
return Model(model.inputs, merged)
|