Tests & examples with pyvkfft

[1]:
# Use the following to install pyvkfft on google colab
if False:
    # Install pyvkfft & dependencies only once using pip
    import os
    if not os.path.exists('dev/pyvkfft'):
      !mkdir dev
      !cd dev && git clone --recursive https://github.com/vincefn/pyvkfft.git
      !pip install pycuda
      # !pip install cupy
      !cd dev/pyvkfft && pip install .
      # scipy, matplotlib not required for pyvkfft, but for tests
      !pip install scipy matplotlib

Select PyCUDA or CuPy

[2]:
try:
    # raise ImportError()  # Uncomment to force using cupy if you have both
    import pycuda.autoinit
    import pycuda.driver as cu_drv
    import pycuda.gpuarray as cua
    synchronize = cu_drv.Context.synchronize
    gpu_empty = cua.empty
    gpu_empty_like = cua.empty_like
    to_gpu = cua.to_gpu
    stream = cu_drv.Stream()
    print("Using PyCUDA")

except ImportError:
    try:
        import cupy
        synchronize = cupy.cuda.stream.get_current_stream().synchronize
        gpu_empty = cupy.empty
        gpu_empty_like = cupy.empty_like
        to_gpu = cupy.asarray
        stream = cupy.cuda.Stream()
        print("Using CuPy")
    except ImportError:
        raise ImportError("You need either PyCUDA or Cupy")

import pyvkfft.cuda
from pyvkfft.cuda import VkFFTApp
from pyvkfft.fft import fftn as vkfftn, ifftn as vkifftn, rfftn as vkrfftn, irfftn as vkirfftn
from pyvkfft.accuracy import l2, li
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
try:
    from scipy.datasets import ascent
except:
    from scipy.misc import ascent
import numpy as np
from scipy.fft import fftn, ifftn, fftshift, rfftn, irfftn, dctn, idctn
import timeit

Using PyCUDA
[3]:


def speed(shape, ndim, nb=10, stream=None, inplace=True, norm=0): """ Perform a speed test using VkFFT ( :param shape: array shape to use :param ndim: number of dimensions for the FFT (e.g. can be 1, 2 or 3 for a 3D array, etc..) :param nb: number of repeats for timing :param stream: the pycuda.driver.Stream to be sued for calculations. If None, the default stream for the active context will be used. :param inplace: if True (default), do an in-place FFT :param norm: norm=0 will have the L2 norm multiplied by the FT size for each transform, whereas norm=1 will divide the L2 norm by the FT size for a backwards transform, similarly to numpy's fft norm='backwards'. :return: a tuple with the time per couple of FFT and iFFT, and the idealised memory throughput assuming one read and one write of the array per transform axis, in Gbytes/s. """ d = to_gpu(np.random.uniform(0, 1, shape).astype(np.complex64)) if not inplace: d1 = gpu_empty_like(d) # print(d.shape) app = VkFFTApp(d.shape, d.dtype, ndim=ndim, stream=stream, inplace=inplace, norm=norm) synchronize() t0 = timeit.default_timer() for i in range(nb): if inplace: d = app.ifft(d) d = app.fft(d) else: d1 = app.ifft(d, d1) d = app.fft(d1, d) synchronize() dt = timeit.default_timer() - t0 shape = list(shape) if len(shape) < 3: shape += [1] * (3 - len(shape)) gbps = d.nbytes * nb * ndim * 2 * 2 / dt / 1024 ** 3 s = "" if not inplace: s= "[out-of-place]" print("(%4d %4d %4d)[%dD] dt=%6.2f ms %7.2f Gbytes/s %s [norm=%d]" % (shape[2], shape[1], shape[0], ndim, dt / nb * 1000, gbps, s, norm)) return dt, gbps

Speed tests

[4]:
if False:
    for inplace in [True, False]:
        speed((256, 256, 256), 3, inplace=inplace)
        speed((400, 400, 400), 3, inplace=inplace)
        speed((512, 512, 512), 3, inplace=inplace)
        speed((16, 504, 504), 2, inplace=inplace)
        speed((16, 512, 512), 2, inplace=inplace)
        speed((16, 1024, 1024), 2, inplace=inplace)
        speed((16, 2048, 2048), 2, inplace=inplace)
        speed((1, 512, 512), 2, inplace=inplace)
        speed((1, 1024, 1024), 2, inplace=inplace)
        speed((1, 2600, 2048), 2, inplace=inplace)
        speed((1, 2048, 2600), 2, inplace=inplace)
        speed((8, 2600, 2048), 2, inplace=inplace)
        speed((8, 2048, 2600), 2, inplace=inplace)
        speed((16, 2200, 2400), 2, inplace=inplace)
        speed((16, 2310, 2730), 2, inplace=inplace)
        speed((16, 2730, 2310), 2, inplace=inplace)  # 2310=2*3*5*7*11, 2730=2*3*5*7*13
        speed((16, 2400, 2200), 2, inplace=inplace)
        speed((1, 3080, 3080), 2, inplace=inplace)
        speed((8, 3072, 3072), 2, inplace=inplace)

        # Also test with a supplied stream
        speed((16, 1000, 1000), 2, stream=stream, inplace=inplace)

Complex<->Complex, inplace

[5]:
# single precision
d = to_gpu(ascent().astype(np.complex64))
app = VkFFTApp(d.shape, d.dtype, ndim=2)

print((abs(d.get()) ** 2).sum())
d = app.fft(d)
print((abs(d.get()) ** 2).sum())

print()

d = app.ifft(d)
print((abs(d.get()) ** 2).sum())

# Double precision
d = to_gpu(ascent().astype(np.complex128))
app = VkFFTApp(d.shape, d.dtype, ndim=2)

plt.figure(figsize=(15, 5))
plt.subplot(121)
plt.imshow(np.fft.fftshift(abs(np.fft.fftn(ascent().astype(np.complex128)))), norm=LogNorm())
plt.colorbar()

print((abs(d.get()) ** 2).sum())
d = app.fft(d)

plt.subplot(122)
plt.imshow(np.fft.fftshift(abs(d.get())), norm=LogNorm())
plt.colorbar()

print((abs(d.get()) ** 2).sum())
d = app.ifft(d)
print((abs(d.get()) ** 2).sum())

2629743400.0
689371240000000.0

2629743000.0
2629743734.0
689371541405696.1
2629743734.0
../_images/examples_pyvkfft-tests-CUDA_8_1.png

Complex<->Complex, inplace, y-axis transform

Use axes=(-2,) to perform a transform along specific axes, skipping the first dimension

[6]:
# single precision
d = to_gpu(ascent().astype(np.complex64))
axes = (-2,)
app = VkFFTApp(d.shape, d.dtype, axes=axes)

print((abs(d.get()) ** 2).sum())
d = app.fft(d)
print((abs(d.get()) ** 2).sum())

print()

d = app.ifft(d)
print((abs(d.get()) ** 2).sum())

# Double precision
d = to_gpu(ascent().astype(np.complex128))
app = VkFFTApp(d.shape, d.dtype, axes=axes)

plt.figure(figsize=(15, 5))
plt.subplot(121)
plt.imshow(np.fft.fftshift(abs(np.fft.fftn(ascent().astype(np.complex128), axes=axes))), norm=LogNorm())
plt.colorbar()

print((abs(d.get()) ** 2).sum())
d = app.fft(d)

plt.subplot(122)
plt.imshow(np.fft.fftshift(abs(d.get())), norm=LogNorm())
plt.colorbar()

print((abs(d.get()) ** 2).sum())
d = app.ifft(d)
print((abs(d.get()) ** 2).sum())

2629743400.0
1346428200000.0

2629743400.0
2629743734.0
1346428791808.0
2629743734.0
../_images/examples_pyvkfft-tests-CUDA_10_1.png

complex<->complex, out-of-place

[7]:
# single precision, out-of-place
d = to_gpu(ascent().astype(np.complex64))
d1 = gpu_empty_like(d)
app = VkFFTApp(d.shape, d.dtype, inplace=False, ndim=2)

print((abs(d.get()) ** 2).sum())
plt.figure(figsize=(15, 4), dpi=100)
plt.subplot(141)
plt.imshow(ascent())
plt.title('Original array')

d1 = app.fft(d, d1)
print((abs(d1.get()) ** 2).sum())
dn = fftn(ascent().astype(np.complex64))
print(np.allclose(dn, d1.get(), rtol=1e-6,atol=abs(dn).max()*1e-6))

plt.subplot(142)
plt.imshow(abs(d.get()))
plt.title('Source array after FFT')

plt.subplot(143)
plt.imshow(fftshift(abs(d1.get())), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title('FFT')

plt.subplot(144)
plt.imshow(fftshift(abs(fftn(ascent().astype(np.complex64)))), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title('FFT (numpy)')


d = app.ifft(d1, d)
print((abs(d.get()) ** 2).sum())
print(np.allclose(ascent(), d.get(), rtol=1e-6,atol=ascent().max()*1e-6))

# double precision, out-of-place
d = to_gpu(ascent().astype(np.complex128))
d1 = gpu_empty_like(d)
app = VkFFTApp(d.shape, d.dtype, inplace=False, ndim=2)

print((abs(d.get()) ** 2).sum())

d1 = app.fft(d, d1)
print((abs(d1.get()) ** 2).sum())
dn = fftn(ascent().astype(np.complex128))
print(np.allclose(dn, d1.get(), rtol=1e-12,atol=abs(dn).max()*1e-12))

d = app.ifft(d1, d)
print((abs(d.get()) ** 2).sum())

2629743400.0
689371240000000.0
True
2629743000.0
True
2629743734.0
689371541405696.1
True
2629743734.0
../_images/examples_pyvkfft-tests-CUDA_12_1.png

Real<->Complex (half-Hermitian) inplace transform

[8]:
# single precision, R2C inplace
d0 = np.zeros((512, 514), dtype=np.float32)
d0[:, :512] = ascent()

d = to_gpu(d0)
app = VkFFTApp(d.shape, d.dtype, ndim=2, r2c=True)

print((abs(d.get()) ** 2).sum())
print(d.shape, d.dtype)
d = app.fft(d)
print(d.shape, d.dtype)
print((abs(d.get()) ** 2).sum())

dn = np.fft.rfftn(d0[:, :-2].astype(np.float32))
print(dn.shape)
print(np.allclose(d.get(), dn, rtol=1e-6, atol=dn.max() * 1e-6))

plt.figure(figsize=(15,5), dpi=100)
plt.subplot(121)
plt.imshow(fftshift(abs(d.get()), axes=0), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title("R->C")
plt.subplot(122)
plt.imshow(fftshift(abs(dn), axes=0), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title("R->C (numpy)")

d = app.ifft(d)
print(d.shape, d.dtype)
print((abs(d.get()) ** 2).sum())

print(np.allclose(d.get()[:, :-2], d0[:, :-2], rtol=1e-6, atol = 255e-6))

2629743600.0
(512, 514) float32
(512, 257) complex64
610018400000000.0
(512, 257)
True
(512, 514) float32
2636300000.0
True
../_images/examples_pyvkfft-tests-CUDA_14_1.png

Real<->Complex (half-Hermitian) out-of-place transform

[9]:
# single precision, R2C inplace
d0 = to_gpu(ascent().astype(np.float32))
d1 = gpu_empty((512, 257), dtype=np.complex64)

app = VkFFTApp(d0.shape, d0.dtype, ndim=2, r2c=True, inplace=False)

print((abs(d0.get()) ** 2).sum())
print(d0.shape, d0.dtype)
d1 = app.fft(d0, d1)
print(d1.shape, d1.dtype)
print((abs(d1.get()) ** 2).sum())

dn = np.fft.rfftn(ascent().astype(np.float32))
print(dn.shape)
print(np.allclose(d1.get(), dn, rtol=1e-6, atol=dn.max() * 1e-6))

plt.figure(figsize=(15,5), dpi=100)
plt.subplot(121)
plt.imshow(fftshift(abs(d1.get()), axes=0), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title("R->C out-of-place")
plt.subplot(122)
plt.imshow(fftshift(abs(dn), axes=0), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title("R->C (numpy)")

d0 = app.ifft(d1, d0)
print(d0.shape, d0.dtype)
print((abs(d0.get()) ** 2).sum())

print(np.allclose(d0.get(), ascent(), rtol=1e-6, atol = 255e-6))

plt.figure(figsize=(15,5), dpi=100)
plt.subplot(121)
plt.imshow(d0.get())
plt.colorbar()
plt.title("R->C & C->R out-of-place")
plt.subplot(122)
plt.imshow(ascent())
plt.colorbar()
plt.title("Original array")

2629743400.0
(512, 512) float32
(512, 257) complex64
610018400000000.0
(512, 257)
True
(512, 512) float32
2629743000.0
True
[9]:
Text(0.5, 1.0, 'Original array')
../_images/examples_pyvkfft-tests-CUDA_16_2.png
../_images/examples_pyvkfft-tests-CUDA_16_3.png

Real<->Real transform = Direct Cosine Transform (DCT)

[10]:
# single precision, DCT inplace
for dct_type in (1,2,3,4):
    print("Testing DCT type %d" % dct_type)
    d0 = to_gpu(ascent().astype(np.float32))

    # dct type  (default if dct=True)
    app = VkFFTApp(d0.shape, d0.dtype, ndim=2, dct=dct_type, inplace=True)

    print((abs(d0.get()) ** 2).sum())
    d0 = app.fft(d0, d0)
    print((abs(d0.get()) ** 2).sum())

    dn = dctn(ascent().astype(np.float32), type=dct_type)
    print(np.allclose(d0.get(), dn, rtol=1e-6, atol=dn.max() * 1e-6))

    fig = plt.figure(figsize=(15,5), dpi=100)
    plt.subplot(121)
    plt.imshow(abs(d0.get()), norm=LogNorm(vmin=100))
    plt.colorbar()
    plt.title("DCT-%d in-place" % dct_type)
    plt.subplot(122)
    plt.imshow(abs(dn), norm=LogNorm(vmin=100))
    plt.colorbar()
    plt.title("DCT-%d (scipy)" % dct_type)

    # inverse transform:
    d0 = app.ifft(d0, d0)
    idn = idctn(dn, type=dct_type)
    print((abs(d0.get()) ** 2).sum(), (abs(idn) ** 2).sum())

    print(np.allclose(d0.get(), idn, rtol=1e-4, atol = 1e-3),
          np.allclose(d0.get(), ascent(), rtol=1e-4, atol = 1e-3))

    fig = plt.figure(figsize=(15,5), dpi=100)
    plt.subplot(121)
    plt.imshow(d0.get())
    plt.colorbar()
    plt.title("DCT + iDCT type %d in-place" % dct_type)
    plt.subplot(122)
    plt.imshow(ascent())
    plt.colorbar()
    plt.title("Original array")
    print()
Testing DCT type 1
2629743400.0
9056434000000000.0
True
2629742000.0 2629743000.0
True True

Testing DCT type 2
2629743400.0
9135847000000000.0
True
2629743000.0 2629743400.0
True True

Testing DCT type 3
2629743400.0
2751634700000000.0
True
2629741800.0 2629743400.0
True True

Testing DCT type 4
2629743400.0
2757484400000000.0
True
2629741600.0 2629743000.0
True True

../_images/examples_pyvkfft-tests-CUDA_18_1.png
../_images/examples_pyvkfft-tests-CUDA_18_2.png
../_images/examples_pyvkfft-tests-CUDA_18_3.png
../_images/examples_pyvkfft-tests-CUDA_18_4.png
../_images/examples_pyvkfft-tests-CUDA_18_5.png
../_images/examples_pyvkfft-tests-CUDA_18_6.png
../_images/examples_pyvkfft-tests-CUDA_18_7.png
../_images/examples_pyvkfft-tests-CUDA_18_8.png

Complex<->Complex, inplace, F-ordering

[11]:
d0 = np.asarray(ascent().astype(np.complex64), order='F')
for axes in [None, (-1,), (-2,)]:
    d = to_gpu(d0)
    app = VkFFTApp(d.shape, d.dtype, strides=d.strides, axes=axes)

    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    plt.imshow(np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes))), norm=LogNorm())
    plt.colorbar()

    d = app.fft(d)

    plt.subplot(122)
    plt.imshow(np.fft.fftshift(abs(d.get())), norm=LogNorm())
    plt.colorbar()

    d = app.ifft(d)

../_images/examples_pyvkfft-tests-CUDA_20_0.png
../_images/examples_pyvkfft-tests-CUDA_20_1.png
../_images/examples_pyvkfft-tests-CUDA_20_2.png

Complex<->Complex, out-of-place, F-ordering

[12]:
d0 = np.asarray(ascent().astype(np.complex64), order='F')
axes = (-1,)
d1 = to_gpu(d0)
d2 = gpu_empty_like(d1)
app = VkFFTApp(d1.shape, d1.dtype, strides=d1.strides, axes=axes, inplace=False)

plt.figure(figsize=(15, 5))
plt.subplot(121)
plt.imshow(np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes))), norm=LogNorm())
plt.colorbar()

print((abs(d.get()) ** 2).sum())
d2 = app.fft(d1, d2)

plt.subplot(122)
plt.imshow(np.fft.fftshift(abs(d2.get())), norm=LogNorm())
plt.colorbar()

d1 = app.ifft(d2, d1)

2629743600.0
../_images/examples_pyvkfft-tests-CUDA_22_1.png

Complex<->Complex, inplace, F-ordering, simple FFT interface

[13]:
order = 'F'
for axes in [None, (-1,), (-2,)]:
    d0 = np.asarray(ascent().astype(np.complex64), order=order)
    d = to_gpu(d0)

    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    dn = np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes)))
    plt.imshow(dn, norm=LogNorm())
    plt.colorbar()

    d = vkfftn(d, d, axes=axes)

    plt.subplot(122)
    plt.imshow(np.fft.fftshift(abs(d.get())), norm=LogNorm())
    plt.colorbar()

    d = vkifftn(d, d, axes=axes)

    # Test accuracy on random array
    shape, dtype = d0.shape, d0.dtype
    d0 = (np.random.uniform(-0.5, 0.5, shape) + 1j * np.random.uniform(-0.5, 0.5, shape)).astype(dtype)
    d0 = np.asarray(d0, order=order)
    d = to_gpu(d0)
    d = vkfftn(d, d, axes=axes)
    dn = np.fft.fftn(d0, axes=axes)
    print(l2(dn, d.get()), li(dn, d.get()))
    d = to_gpu(d0)
    d = vkifftn(d, d, axes=axes)
    dn = np.fft.ifftn(d0, axes=axes)
    print(l2(dn, d.get()), li(dn, d.get()))
    print()


4.444979922273623e-07 5.908248388740862e-07
4.4354686916232555e-07 6.16391201187473e-07

2.6663396324512363e-07 3.872400644200587e-07
2.664766305948513e-07 4.079651360917785e-07

2.659319511021375e-07 3.5171515628801596e-07
2.6613409491012233e-07 3.6890486999237857e-07

../_images/examples_pyvkfft-tests-CUDA_24_1.png
../_images/examples_pyvkfft-tests-CUDA_24_2.png
../_images/examples_pyvkfft-tests-CUDA_24_3.png

Complex<->Complex, out-of-place, F-ordering, simple FFT interface

The destination array is automatically allocated

[ ]:
order = 'F'
for axes in [None, (-1,), (-2,)]:
    d0 = np.asarray(ascent().astype(np.complex64), order=order)
    d = to_gpu(d0)

    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    dn = np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes)))
    plt.imshow(dn, norm=LogNorm())
    plt.colorbar()

    d = vkfftn(d, axes=axes)

    plt.subplot(122)
    plt.imshow(np.fft.fftshift(abs(d.get())), norm=LogNorm())
    plt.colorbar()

    d = vkifftn(d, axes=axes)

    # Test accuracy on random array
    shape, dtype = d0.shape, d0.dtype
    d0 = (np.random.uniform(-0.5, 0.5, shape) + 1j * np.random.uniform(-0.5, 0.5, shape)).astype(dtype)
    d0 = np.asarray(d0, order=order)
    d = to_gpu(d0)
    d = vkfftn(d, axes=axes)
    dn = np.fft.fftn(d0, axes=axes)
    print(l2(dn, d.get()), li(dn, d.get()))
    d = to_gpu(d0)
    d = vkifftn(d, axes=axes)
    dn = np.fft.ifftn(d0, axes=axes)
    print(l2(dn, d.get()), li(dn, d.get()))
    print()


Complex<->Complex, out-of-place pre-allocated, F-ordering, simple FFT interface

The destination array is pre-allocated here

[ ]:
order = 'F'
for axes in [None, (-1,), (-2,)]:
    d0 = np.asarray(ascent().astype(np.complex64), order=order)
    d = to_gpu(d0)
    dest = gpu_empty_like(d)

    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    dn = np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes)))
    plt.imshow(dn, norm=LogNorm())
    plt.colorbar()

    dest = vkfftn(d, dest, axes=axes)

    plt.subplot(122)
    plt.imshow(np.fft.fftshift(abs(dest.get())), norm=LogNorm())
    plt.colorbar()

    dest = vkifftn(d, axes=axes)

    # Test accuracy on random array
    shape, dtype = d0.shape, d0.dtype
    d0 = (np.random.uniform(-0.5, 0.5, shape) + 1j * np.random.uniform(-0.5, 0.5, shape)).astype(dtype)
    d0 = np.asarray(d0, order=order)

    d = to_gpu(d0)
    dest = vkfftn(d, dest, axes=axes)
    dn = np.fft.fftn(d0, axes=axes)
    print(l2(dn, dest.get()), li(dn, dest.get()))

    d = to_gpu(d0)
    dest = vkifftn(d, dest, axes=axes)
    dn = np.fft.ifftn(d0, axes=axes)
    print(l2(dn, dest.get()), li(dn, dest.get()))
    print()


Real<->Complex (half-Hermitian) inplace transform, F-ordered, simple fft interface

[ ]:
# single precision, R2C inplace, # F order
order = 'F'
d0 = np.zeros((514, 512), dtype=np.float32)
d0[:512] = ascent()
d0 = np.asarray(d0, order=order)

d = to_gpu(d0)

d = vkrfftn(d, d)

dn = rfftn(d0[:-2], axes=(-1,-2))  # Need to force the Hermitian axis as the fast one
print(dn.shape)
print(np.allclose(d.get(), dn, rtol=1e-6, atol=dn.max() * 1e-6))

plt.figure(figsize=(15,5), dpi=100)
plt.subplot(121)
plt.imshow(fftshift(abs(d.get()), axes=0), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title("R->C")
plt.subplot(122)
plt.imshow(fftshift(abs(dn), axes=0), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title("R->C (numpy)")

d = vkirfftn(d, d)
print(np.allclose(d.get()[:-2], d0[:-2], rtol=1e-6, atol = 255e-6))

[ ]:
d.strides , dn.strides

Real<->Complex (half-Hermitian) out-of-place, F-ordered, simple fft interface

[ ]:
# single precision, R2C inplace, # F order
order = 'F'
d0 = ascent().astype(np.float32)
d0 = np.asarray(d0, order=order)

d = to_gpu(d0)

d = vkrfftn(d)

dn = rfftn(d0, axes=(-1,-2))  # Need to force the Hermitian axis as the fast one
print(dn.shape, d.shape)
print(np.allclose(d.get(), dn, rtol=1e-6, atol=dn.max() * 1e-6))

plt.figure(figsize=(15,5), dpi=100)
plt.subplot(121)
plt.imshow(fftshift(abs(d.get()), axes=0), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title("R->C")
plt.subplot(122)
plt.imshow(fftshift(abs(dn), axes=0), norm=LogNorm(vmin=100))
plt.colorbar()
plt.title("R->C (numpy)")

d = vkirfftn(d)
print(np.allclose(d.get(), d0, rtol=1e-6, atol = 255e-6))

[ ]: