Profiling¶
> This page is very out-of-date and has some inconsistencies with current versions
The following block of code is profiled by cProfile
:
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
from context import * # type: ignore # noqa: F401, F403
from QCLayers import auto_gain
import SaveLoad
import cProfile
import pstats
import io
if __name__ == "__main__":
cp = cProfile.Profile()
with open("../example/PQLiu.json") as f:
qcl = SaveLoad.qclLoad(f)
qcl.xres = 0.1
cp.enable()
auto_gain(qcl)
cp.disable()
cp.print_stats(sort="cumulative")
s = io.StringIO()
ps = pstats.Stats(cp, stream=s).sort_stats('cumulative')
ps.print_stats()
with open('profiling_0.txt', 'w+') as f:
f.write(s.getvalue())
where PQLiu.json is the following file:
{
"FileType": "ErwinJr2 Data File",
"Version": "210330",
"Description": "dx.doi.org/10.1038/nphoton.2009.262",
"QCLayers": {
"Wavelength": 4.7,
"Substrate": "InP",
"EField": 102.0,
"x resolution": 1.0,
"E resolution": 0.5,
"No of states": 20,
"Solver": "ODE",
"Temperature": 300.0,
"Repeats": 4,
"MaterialDefs": {
"Compostion": ["InGaAs", "AlInAs"],
"Mole Fraction": [0.66, 0.31]
},
"Material": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
"Width": [23.0, 28.0, 26.0, 22.0, 21.0, 18.0, 18.0, 15.0, 13.0, 12.0, 10.0, 42.0, 12.0, 39.0, 14.0, 33.0],
"Doping": [0.0, 0.0, 1.5, 1.5, 1.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
"Active Region": [false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true],
"IFR": false
},
"Waveguide": {
"wavelength": 4.7,
"materials": ["Air", "Au", "SiNx", "InxGa1-xAs", "InxGa1-xAs", "Al1-xInxAs", "Al1-xInxAs", "InxGa1-xAs", "Active Core", "InxGa1-xAs", "InP"],
"moleFracs": [0.0, 0.0, 0.35, 0.53, 0.53, 0.52, 0.52, 0.53, 0.53, 0.53, 0.0],
"dopings": [0.0, 0.0, 0.0, 1200.0, 90.0, 2.0, 3.0, 0.5, 0.5, 0.5, 0.0],
"width": [1.0, 0.45, 0.4, 0.01, 0.5, 0.5, 0.6, 0.2, 1.384, 0.2, 2.0],
"mobilities": [null, null, null, null, null, null, null, null, null, null, null],
"custom": {"Active Core": {"index": "(3.286+0j)", "period": [346.0, 40], "gain": 53.4}}
}
}
Current version with population and gain¶
ncalls |
tottime |
percall |
cumtime |
percall |
filename:lineno(function) |
---|---|---|---|---|---|
1 |
0.001 |
0.001 |
5.955 |
5.955 |
./ErwinJr2/QCLayers.py:1493(auto_gain) |
1 |
0.000 |
0.000 |
4.753 |
4.753 |
./ErwinJr2/QCLayers.py:1386(full_population) |
1 |
0.024 |
0.024 |
4.753 |
4.753 |
./ErwinJr2/QCLayers.py:921(full_population) |
3640 |
1.358 |
0.000 |
3.361 |
0.001 |
./ErwinJr2/QCLayers.py:701(_ifr_transition) |
14428 |
0.014 |
0.000 |
1.545 |
0.000 |
./ErwinJr2/QCLayers.py:433(psi_overlap) |
14436 |
0.316 |
0.000 |
1.531 |
0.000 |
{built-in method builtins.sum} |
3640 |
0.038 |
0.000 |
1.367 |
0.000 |
./ErwinJr2/QCLayers.py:621(_lo_transition) |
57712 |
0.292 |
0.000 |
1.216 |
0.000 |
./ErwinJr2/QCLayers.py:437(<genexpr>) |
45069 |
0.090 |
0.000 |
1.024 |
0.000 |
./ErwinJr2/QCLayers.py:273(_shift_psi) |
12164 |
0.012 |
0.000 |
0.935 |
0.000 |
<__array_function__ internals>:2(interp) |
12164 |
0.015 |
0.000 |
0.914 |
0.000 |
[.python-packages]/numpy/lib/function_base.py:1289(interp) |
12164 |
0.871 |
0.000 |
0.871 |
0.000 |
{built-in method numpy.core._multiarray_umath.interp} |
1754 |
0.783 |
0.000 |
0.800 |
0.000 |
./ErwinJr2/OneDQuantum/OneDSchrodinger.py:144(cLOphononScatter) |
1 |
0.017 |
0.017 |
0.678 |
0.678 |
./ErwinJr2/QCLayers.py:1394(full_gain_spectrum) |
1785 |
0.078 |
0.000 |
0.656 |
0.000 |
./ErwinJr2/QCLayers.py:587(_dipole) |
1 |
0.002 |
0.002 |
0.513 |
0.513 |
./ErwinJr2/QCLayers.py:322(solve_whole) |
1 |
0.036 |
0.036 |
0.510 |
0.510 |
./ErwinJr2/QCLayers.py:344(_solve_whole_ode) |
1 |
0.469 |
0.469 |
0.469 |
0.469 |
./ErwinJr2/OneDQuantum/OneDSchrodinger.py:101(cBandSolve1D) |
343980 |
0.372 |
0.000 |
0.372 |
0.000 |
./ErwinJr2/QCLayers.py:732(interpZ) |
116482 |
0.062 |
0.000 |
0.355 |
0.000 |
<__array_function__ internals>:2(argmax) |
3570 |
0.281 |
0.000 |
0.281 |
0.000 |
./ErwinJr2/QCLayers.py:993(_xBandMassInv) |
7179 |
0.005 |
0.000 |
0.248 |
0.000 |
<__array_function__ internals>:2(trapz) |
7179 |
0.169 |
0.000 |
0.237 |
0.000 |
[.python-packages]/numpy/lib/function_base.py:4006(trapz) |
116482 |
0.053 |
0.000 |
0.234 |
0.000 |
[.python-packages]/numpy/core/fromnumeric.py:1114(argmax) |
116483 |
0.059 |
0.000 |
0.182 |
0.000 |
[.python-packages]/numpy/core/fromnumeric.py:52(_wrapfunc) |
3570 |
0.002 |
0.000 |
0.138 |
0.000 |
<__array_function__ internals>:2(gradient) |
3570 |
0.101 |
0.000 |
0.129 |
0.000 |
[.python-packages]/numpy/lib/function_base.py:802(gradient) |
116482 |
0.103 |
0.000 |
0.103 |
0.000 |
{method ‘argmax’ of ‘numpy.ndarray’ objects} |
7181 |
0.005 |
0.000 |
0.063 |
0.000 |
{method ‘sum’ of ‘numpy.ndarray’ objects} |
232976 |
0.058 |
0.000 |
0.058 |
0.000 |
./ErwinJr2/QCLayers.py:1247(layerVc) |
7181 |
0.003 |
0.000 |
0.058 |
0.000 |
[.python-packages]/numpy/core/_methods.py:45(_sum) |
7210 |
0.056 |
0.000 |
0.056 |
0.000 |
{method ‘reduce’ of ‘numpy.ufunc’ objects} |
12164 |
0.006 |
0.000 |
0.021 |
0.000 |
<__array_function__ internals>:2(iscomplexobj) |
1.0 and earlier versions¶
Before optimizing code performamce, there are 814603 function calls taking 10.835
seconds in total. The output of cProfile
sorted by cumulative time is
listed in the following table.
ncalls |
tottime |
percall |
cumtime |
percall |
filename:lineno(function) |
---|---|---|---|---|---|
1 |
0.000 |
0.000 |
10.803 |
10.803 |
QCLayers.py:425(calc_FoM) |
35 |
9.742 |
0.278 |
10.803 |
0.309 |
QCLayers.py:383(loTransition) |
2 |
0.000 |
0.000 |
10.449 |
5.224 |
QCLayers.py:419(loLifeTime) |
2 |
0.000 |
0.000 |
10.449 |
5.224 |
QCLayers.py:422(<listcomp>) |
135301 |
0.132 |
0.000 |
0.797 |
0.000 |
fromnumeric.py:1821(sum) |
135303 |
0.155 |
0.000 |
0.640 |
0.000 |
fromnumeric.py:64(_wrapreduction) |
135331 |
0.467 |
0.000 |
0.467 |
0.000 |
{method ‘reduce’ of ‘numpy.ufunc’ objects} |
135180 |
0.262 |
0.000 |
0.262 |
0.000 |
{built-in method builtins.abs} |
1 |
0.000 |
0.000 |
0.037 |
0.037 |
QCLayers.py:260(solve_whole) |
1 |
0.036 |
0.036 |
0.036 |
0.036 |
OneDSchrodinger.py:61(cSimpleSolve1D) |
135313 |
0.025 |
0.000 |
0.025 |
0.000 |
{built-in method builtins.isinstance} |
135303 |
0.019 |
0.000 |
0.019 |
0.000 |
{method ‘items’ of ‘dict’ objects} |
1 |
0.001 |
0.001 |
0.002 |
0.002 |
QCLayers.py:178(populate_x) |
65 |
0.000 |
0.000 |
0.001 |
0.000 |
QCLayers.py:169(avghwLO) |
1 |
0.001 |
0.001 |
0.001 |
0.001 |
OneDSchrodinger.py:74(cSimpleFillPsi) |
227 |
0.000 |
0.000 |
0.001 |
0.000 |
{built-in method builtins.sum} |
28 |
0.001 |
0.000 |
0.001 |
0.000 |
QCLayers.py:227(<listcomp>) |
1885 |
0.001 |
0.000 |
0.001 |
0.000 |
QCLayers.py:173(<genexpr>) |
After optimizing code performance by replace most time consuming function call
(QCLayers.loLifeTime()
), by C code and add openMP support,
there are 3613 function calls taking 1.255 seconds in total.
The output of cProfile
sorted by cumulative time is listed in the
following table.
ncalls |
tottime |
percall |
cumtime |
percall |
filename:lineno(function) |
---|---|---|---|---|---|
1 |
0.000 |
0.000 |
1.205 |
1.205 |
QCLayers.py:427(calc_FoM) |
35 |
0.003 |
0.000 |
1.205 |
0.034 |
QCLayers.py:383(loTransition) |
30 |
1.199 |
0.040 |
1.199 |
0.040 |
OneDSchrodinger.py:113(cLOphononScatter) |
2 |
0.000 |
0.000 |
1.166 |
0.583 |
QCLayers.py:421(loLifeTime) |
2 |
0.000 |
0.000 |
1.165 |
0.583 |
QCLayers.py:424(<listcomp>) |
1 |
0.000 |
0.000 |
0.048 |
0.048 |
QCLayers.py:260(solve_whole) |
1 |
0.046 |
0.046 |
0.046 |
0.046 |
OneDSchrodinger.py:65(cSimpleSolve1D) |
1 |
0.001 |
0.001 |
0.002 |
0.002 |
QCLayers.py:178(populate_x) |
91 |
0.000 |
0.000 |
0.001 |
0.000 |
fromnumeric.py:1821(sum) |
65 |
0.000 |
0.000 |
0.001 |
0.000 |
QCLayers.py:169(avghwLO) |
227 |
0.000 |
0.000 |
0.001 |
0.000 |
{built-in method builtins.sum} |
1 |
0.001 |
0.001 |
0.001 |
0.001 |
OneDSchrodinger.py:78(cSimpleFillPsi) |
93 |
0.000 |
0.000 |
0.001 |
0.000 |
fromnumeric.py:64(_wrapreduction) |
121 |
0.001 |
0.000 |
0.001 |
0.000 |
{method ‘reduce’ of ‘numpy.ufunc’ objects} |
1885 |
0.001 |
0.000 |
0.001 |
0.000 |
QCLayers.py:173(<genexpr>) |
68 |
0.000 |
0.000 |
0.001 |
0.000 |
ctypeslib.py:196(from_param) |
28 |
0.000 |
0.000 |
0.000 |
0.000 |
QCLayers.py:227(<listcomp>) |
65 |
0.000 |
0.000 |
0.000 |
0.000 |
{built-in method numpy.core.multiarray.array} |