Profiling
> This page is very out-of-date and has some inconsistencies with current versions
The following block of code is profiled by cProfile:
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import cProfile
import io
import pstats
import ErwinJr2.save_load as save_load
from ErwinJr2.qclayers import auto_gain
if __name__ == "__main__":
cp = cProfile.Profile()
with open("../example/PQLiu.json") as f:
qcl = save_load.qcl_load(f)
qcl.x_step = 0.1
cp.enable()
auto_gain(qcl)
cp.disable()
cp.print_stats(sort="cumulative")
s = io.StringIO()
ps = pstats.Stats(cp, stream=s).sort_stats("cumulative")
ps.print_stats()
with open("profiling_0.txt", "w+") as f:
f.write(s.getvalue())
where PQLiu.json is the following file:
{
"FileType": "ErwinJr2 Data File",
"Version": "210330",
"Description": "dx.doi.org/10.1038/nphoton.2009.262",
"QCLayers": {
"Wavelength": 4.7,
"Substrate": "InP",
"EField": 102.0,
"x resolution": 1.0,
"E resolution": 0.5,
"No of states": 20,
"Solver": "ODE",
"Temperature": 300.0,
"Repeats": 4,
"MaterialDefs": {
"Compostion": ["InGaAs", "AlInAs"],
"Mole Fraction": [0.66, 0.31]
},
"Material": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
"Width": [23.0, 28.0, 26.0, 22.0, 21.0, 18.0, 18.0, 15.0, 13.0, 12.0, 10.0, 42.0, 12.0, 39.0, 14.0, 33.0],
"Doping": [0.0, 0.0, 1.5, 1.5, 1.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
"Active Region": [false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true],
"IFR": false
},
"Waveguide": {
"wavelength": 4.7,
"materials": ["Air", "Au", "SiNx", "InxGa1-xAs", "InxGa1-xAs", "Al1-xInxAs", "Al1-xInxAs", "InxGa1-xAs", "Active Core", "InxGa1-xAs", "InP"],
"moleFracs": [0.0, 0.0, 0.35, 0.53, 0.53, 0.52, 0.52, 0.53, 0.53, 0.53, 0.0],
"dopings": [0.0, 0.0, 0.0, 1200.0, 90.0, 2.0, 3.0, 0.5, 0.5, 0.5, 0.0],
"width": [1.0, 0.45, 0.4, 0.01, 0.5, 0.5, 0.6, 0.2, 1.384, 0.2, 2.0],
"mobilities": [null, null, null, null, null, null, null, null, null, null, null],
"custom": {"Active Core": {"index": "(3.286+0j)", "period": [346.0, 40], "gain": 53.4}}
}
}
Current version with population and gain
ncalls |
tottime |
percall |
cumtime |
percall |
filename:lineno(function) |
|---|---|---|---|---|---|
1 |
0.001 |
0.001 |
5.955 |
5.955 |
./ErwinJr2/QCLayers.py:1493(auto_gain) |
1 |
0.000 |
0.000 |
4.753 |
4.753 |
./ErwinJr2/QCLayers.py:1386(full_population) |
1 |
0.024 |
0.024 |
4.753 |
4.753 |
./ErwinJr2/QCLayers.py:921(full_population) |
3640 |
1.358 |
0.000 |
3.361 |
0.001 |
./ErwinJr2/QCLayers.py:701(_ifr_transition) |
14428 |
0.014 |
0.000 |
1.545 |
0.000 |
./ErwinJr2/QCLayers.py:433(psi_overlap) |
14436 |
0.316 |
0.000 |
1.531 |
0.000 |
{built-in method builtins.sum} |
3640 |
0.038 |
0.000 |
1.367 |
0.000 |
./ErwinJr2/QCLayers.py:621(_lo_transition) |
57712 |
0.292 |
0.000 |
1.216 |
0.000 |
./ErwinJr2/QCLayers.py:437(<genexpr>) |
45069 |
0.090 |
0.000 |
1.024 |
0.000 |
./ErwinJr2/QCLayers.py:273(_shift_psi) |
12164 |
0.012 |
0.000 |
0.935 |
0.000 |
<__array_function__ internals>:2(interp) |
12164 |
0.015 |
0.000 |
0.914 |
0.000 |
[.python-packages]/numpy/lib/function_base.py:1289(interp) |
12164 |
0.871 |
0.000 |
0.871 |
0.000 |
{built-in method numpy.core._multiarray_umath.interp} |
1754 |
0.783 |
0.000 |
0.800 |
0.000 |
./ErwinJr2/OneDQuantum/OneDSchrodinger.py:144(cLOphononScatter) |
1 |
0.017 |
0.017 |
0.678 |
0.678 |
./ErwinJr2/QCLayers.py:1394(full_gain_spectrum) |
1785 |
0.078 |
0.000 |
0.656 |
0.000 |
./ErwinJr2/QCLayers.py:587(_dipole) |
1 |
0.002 |
0.002 |
0.513 |
0.513 |
./ErwinJr2/QCLayers.py:322(solve_whole) |
1 |
0.036 |
0.036 |
0.510 |
0.510 |
./ErwinJr2/QCLayers.py:344(_solve_whole_ode) |
1 |
0.469 |
0.469 |
0.469 |
0.469 |
./ErwinJr2/OneDQuantum/OneDSchrodinger.py:101(cBandSolve1D) |
343980 |
0.372 |
0.000 |
0.372 |
0.000 |
./ErwinJr2/QCLayers.py:732(interpZ) |
116482 |
0.062 |
0.000 |
0.355 |
0.000 |
<__array_function__ internals>:2(argmax) |
3570 |
0.281 |
0.000 |
0.281 |
0.000 |
./ErwinJr2/QCLayers.py:993(_xBandMassInv) |
7179 |
0.005 |
0.000 |
0.248 |
0.000 |
<__array_function__ internals>:2(trapz) |
7179 |
0.169 |
0.000 |
0.237 |
0.000 |
[.python-packages]/numpy/lib/function_base.py:4006(trapz) |
116482 |
0.053 |
0.000 |
0.234 |
0.000 |
[.python-packages]/numpy/core/fromnumeric.py:1114(argmax) |
116483 |
0.059 |
0.000 |
0.182 |
0.000 |
[.python-packages]/numpy/core/fromnumeric.py:52(_wrapfunc) |
3570 |
0.002 |
0.000 |
0.138 |
0.000 |
<__array_function__ internals>:2(gradient) |
3570 |
0.101 |
0.000 |
0.129 |
0.000 |
[.python-packages]/numpy/lib/function_base.py:802(gradient) |
116482 |
0.103 |
0.000 |
0.103 |
0.000 |
{method ‘argmax’ of ‘numpy.ndarray’ objects} |
7181 |
0.005 |
0.000 |
0.063 |
0.000 |
{method ‘sum’ of ‘numpy.ndarray’ objects} |
232976 |
0.058 |
0.000 |
0.058 |
0.000 |
./ErwinJr2/QCLayers.py:1247(layerVc) |
7181 |
0.003 |
0.000 |
0.058 |
0.000 |
[.python-packages]/numpy/core/_methods.py:45(_sum) |
7210 |
0.056 |
0.000 |
0.056 |
0.000 |
{method ‘reduce’ of ‘numpy.ufunc’ objects} |
12164 |
0.006 |
0.000 |
0.021 |
0.000 |
<__array_function__ internals>:2(iscomplexobj) |
1.0 and earlier versions
Before optimizing code performamce, there are 814603 function calls taking 10.835
seconds in total. The output of cProfile sorted by cumulative time is
listed in the following table.
ncalls |
tottime |
percall |
cumtime |
percall |
filename:lineno(function) |
|---|---|---|---|---|---|
1 |
0.000 |
0.000 |
10.803 |
10.803 |
QCLayers.py:425(calc_FoM) |
35 |
9.742 |
0.278 |
10.803 |
0.309 |
QCLayers.py:383(loTransition) |
2 |
0.000 |
0.000 |
10.449 |
5.224 |
QCLayers.py:419(loLifeTime) |
2 |
0.000 |
0.000 |
10.449 |
5.224 |
QCLayers.py:422(<listcomp>) |
135301 |
0.132 |
0.000 |
0.797 |
0.000 |
fromnumeric.py:1821(sum) |
135303 |
0.155 |
0.000 |
0.640 |
0.000 |
fromnumeric.py:64(_wrapreduction) |
135331 |
0.467 |
0.000 |
0.467 |
0.000 |
{method ‘reduce’ of ‘numpy.ufunc’ objects} |
135180 |
0.262 |
0.000 |
0.262 |
0.000 |
{built-in method builtins.abs} |
1 |
0.000 |
0.000 |
0.037 |
0.037 |
QCLayers.py:260(solve_whole) |
1 |
0.036 |
0.036 |
0.036 |
0.036 |
OneDSchrodinger.py:61(cSimpleSolve1D) |
135313 |
0.025 |
0.000 |
0.025 |
0.000 |
{built-in method builtins.isinstance} |
135303 |
0.019 |
0.000 |
0.019 |
0.000 |
{method ‘items’ of ‘dict’ objects} |
1 |
0.001 |
0.001 |
0.002 |
0.002 |
QCLayers.py:178(populate_x) |
65 |
0.000 |
0.000 |
0.001 |
0.000 |
QCLayers.py:169(avghwLO) |
1 |
0.001 |
0.001 |
0.001 |
0.001 |
OneDSchrodinger.py:74(cSimpleFillPsi) |
227 |
0.000 |
0.000 |
0.001 |
0.000 |
{built-in method builtins.sum} |
28 |
0.001 |
0.000 |
0.001 |
0.000 |
QCLayers.py:227(<listcomp>) |
1885 |
0.001 |
0.000 |
0.001 |
0.000 |
QCLayers.py:173(<genexpr>) |
After optimizing code performance by replace most time consuming function call
(QCLayers.loLifeTime()), by C code and add openMP support,
there are 3613 function calls taking 1.255 seconds in total.
The output of cProfile sorted by cumulative time is listed in the
following table.
ncalls |
tottime |
percall |
cumtime |
percall |
filename:lineno(function) |
|---|---|---|---|---|---|
1 |
0.000 |
0.000 |
1.205 |
1.205 |
QCLayers.py:427(calc_FoM) |
35 |
0.003 |
0.000 |
1.205 |
0.034 |
QCLayers.py:383(loTransition) |
30 |
1.199 |
0.040 |
1.199 |
0.040 |
OneDSchrodinger.py:113(cLOphononScatter) |
2 |
0.000 |
0.000 |
1.166 |
0.583 |
QCLayers.py:421(loLifeTime) |
2 |
0.000 |
0.000 |
1.165 |
0.583 |
QCLayers.py:424(<listcomp>) |
1 |
0.000 |
0.000 |
0.048 |
0.048 |
QCLayers.py:260(solve_whole) |
1 |
0.046 |
0.046 |
0.046 |
0.046 |
OneDSchrodinger.py:65(cSimpleSolve1D) |
1 |
0.001 |
0.001 |
0.002 |
0.002 |
QCLayers.py:178(populate_x) |
91 |
0.000 |
0.000 |
0.001 |
0.000 |
fromnumeric.py:1821(sum) |
65 |
0.000 |
0.000 |
0.001 |
0.000 |
QCLayers.py:169(avghwLO) |
227 |
0.000 |
0.000 |
0.001 |
0.000 |
{built-in method builtins.sum} |
1 |
0.001 |
0.001 |
0.001 |
0.001 |
OneDSchrodinger.py:78(cSimpleFillPsi) |
93 |
0.000 |
0.000 |
0.001 |
0.000 |
fromnumeric.py:64(_wrapreduction) |
121 |
0.001 |
0.000 |
0.001 |
0.000 |
{method ‘reduce’ of ‘numpy.ufunc’ objects} |
1885 |
0.001 |
0.000 |
0.001 |
0.000 |
QCLayers.py:173(<genexpr>) |
68 |
0.000 |
0.000 |
0.001 |
0.000 |
ctypeslib.py:196(from_param) |
28 |
0.000 |
0.000 |
0.000 |
0.000 |
QCLayers.py:227(<listcomp>) |
65 |
0.000 |
0.000 |
0.000 |
0.000 |
{built-in method numpy.core.multiarray.array} |