Привет, Хабр! Сегодня хочу поделиться своим небольшим опытом выбора инструментов для организации расчетов на будущем сервере. Отмечу сразу, что в этой публикации речь пойдет не о самом сервере, а скорее об оптимизации символьных вычислений на нем.
import redis
r = redis.StrictRedis(host='localhost', port=6379, db=0) #подключение к серверу redis
r.hset('expr:1', 'expr', expr) #запись самой формулы в хэш 'expr:1'
r.hset('expr:1', 'params', num) #запись числа параметров в хэш 'expr:1'
r.hget('expr:1', 'expr') #извлечение формулы из хэша 'expr:1'
r.hget('expr:1', 'params') #извлечение числа параметров из хэша 'expr:1'
class Profiler(object): #профилировщик времени
def __init__(self,info=''):
self.info = info
def __enter__(self):
self._startTime = time()
def __exit__(self, type, value, traceback):
print(self.info, "Elapsed time: {:.3f} sec".format(time() - self._startTime))
with Profiler('read (' + str(num_iter) + '): cycle'):
for i in range(num_iter):
f = open('expr.txt')
expr_txt = f.read()
f.close()
>>read (1000): cycle Elapsed time: 0.014 sec
with Profiler('find unique sorted symbols (' + str(num_iter) + '): cycle'):
for i in range(num_iter):
symbols_set = set()
result = re.findall(r"x\d_", expr_txt)
for match in result:
symbols_set.add(match)
symbols_set = sorted(symbols_set)
symbols_list = symbols(symbols_set)
>>find unique sorted symbols (1000): cycle Elapsed time: 0.156 sec
with Profiler('sympify'):
expr = sympify(expr_txt)
>>sympify Elapsed time: 0.426 sec
with Profiler('subs cycle (' + str(num_iter) + '): cycle'):
for i in range(num_iter):
expr_copy = copy.copy(expr)
for x in symbols_list:
expr_copy = expr_copy.subs(x,1)
>>subs cycle (1000): cycle Elapsed time: 0.245 sec
with Profiler('lambdify'):
func = lambdify(tuple(symbols_list), expr, 'numpy') # returns a numpy-ready function
>>lambdify Elapsed time: 0.114 sec
with Profiler('subs cycle (' + str(num_iter) + '): lambdify'):
for i in range(num_iter):
func(*[1 for i in range(len(symbols_set))])
>>subs cycle (1000): lambdify Elapsed time: 0.026 sec
print('exp1 == exp2:', round(expr_copy,12) == round(func(*[1 for i in range(len(symbols_set))]),12))
>>exp1 == exp2: True
with Profiler('pickle_dumps cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
pickle_dump = pickle.dumps(expr)
with Profiler('pickle_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
pickle.loads(pickle_dump)
print()
with Profiler('cloudpickle_dumps cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
cloudpickle_dump = cloudpickle.dumps(expr)
with Profiler('cloudpickle_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
cloudpickle.loads(cloudpickle_dump)
print()
with Profiler('dill_dumps cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
dill_dump = dill.dumps(expr)
with Profiler('dill_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
dill.loads(dill_dump)
>>pickle_dumps cycle (1000): sympifyed expr Elapsed time: 0.430 sec
>>pickle_loads cycle (1000): sympifyed expr Elapsed time: 2.320 sec
>>
>>cloudpickle_dumps cycle (1000): sympifyed expr Elapsed time: 7.584 sec
>>cloudpickle_loads cycle (1000): sympifyed expr Elapsed time: 2.314 sec
>>
>>dill_dumps cycle (1000): sympifyed expr Elapsed time: 8.259 sec
>>dill_loads cycle (1000): sympifyed expr Elapsed time: 2.806 sec
with Profiler('redis_set cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
r.set('expr', pickle_dump)
with Profiler('redis_get cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
r.get('expr')
print()
with Profiler('pickle_dumps + redis_set cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
r.set('expr', pickle.dumps(expr))
with Profiler('redis_get + pickle_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
pickle.loads(r.get('expr'))
print()
with Profiler('cloudpickle_dumps + redis_set cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
r.set('expr', cloudpickle.dumps(expr))
with Profiler('redis_get + cloudpickle_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
cloudpickle.loads(r.get('expr'))
print()
with Profiler('dill_dumps + redis_set cycle (' + str(num_iter) + '): lambdifyed expr'):
for i in range(num_iter):
r.set('expr', dill.dumps(expr))
with Profiler('redis_get + dill_loads cycle (' + str(num_iter) + '): lambdifyed expr'):
for i in range(num_iter):
dill.loads(r.get('expr'))
>>redis_set cycle (1000): sympifyed expr Elapsed time: 0.066 sec
>>redis_get cycle (1000): sympifyed expr Elapsed time: 0.051 sec
>>
>>pickle_dumps + redis_set cycle (1000): sympifyed expr Elapsed time: 0.524 sec
>>redis_get + pickle_loads cycle (1000): sympifyed expr Elapsed time: 2.437 sec
>>
>>cloudpickle_dumps + redis_set cycle (1000): sympifyed expr Elapsed time: 7.659 sec
>>redis_get + cloudpickle_loads cycle (1000): sympifyed expr Elapsed time: 2.492 sec
>>
>>dill_dumps + redis_set cycle (1000): lambdifyed expr Elapsed time: 8.333 sec
>>redis_get + dill_loads cycle (1000): lambdifyed expr Elapsed time: 2.932 sec
print('\nFINAL performance test:')
with Profiler('sympify + pickle_dumps_sympifyed_expr + redis_set cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
expr = sympify(expr_txt)
r.set('expr', pickle.dumps(expr))
with Profiler('redis_get + pickle_loads_sympifyed_expr + subs cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
loaded_expr = pickle.loads(r.get('expr'))
expr_copy = copy.copy(loaded_expr)
for x in symbols_list:
expr_copy = expr_copy.subs(x,1)
with Profiler('sympify + lambdify + dill_dumps_lambdifyed_expr + redis_set cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
expr = sympify(expr_txt)
func = lambdify(tuple(symbols_list), expr, 'numpy')
r.set('expr', dill.dumps(expr))
with Profiler('redis_get + dill_loads_lambdifyed_expr + subs cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
loaded_expr = dill.loads(r.get('expr'))
func(*[1 for i in range(len(symbols_set))])
with Profiler('sympify + cloudpickle_dumps_sympifyed_expr + redis_set cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
expr = sympify(expr_txt)
r.set('expr', cloudpickle.dumps(expr))
with Profiler('redis_get + cloudpickle_loads_sympifyed_expr + subs cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
loaded_expr = cloudpickle.loads(r.get('expr'))
expr_copy = copy.copy(loaded_expr)
for x in symbols_list:
expr_copy = expr_copy.subs(x,1)
with Profiler('sympify + lambdify + cloudpickle_dumps_lambdifyed_expr + redis_set cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
expr = sympify(expr_txt)
func = lambdify(tuple(symbols_list), expr, 'numpy')
r.set('expr', cloudpickle.dumps(expr))
with Profiler('redis_get + cloudpickle_loads_lambdifyed_expr + subs cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
loaded_expr = cloudpickle.loads(r.get('expr'))
func(*[1 for i in range(len(symbols_set))])
>>FINAL performance test:
>>sympify + pickle_dumps_sympifyed_expr + redis_set cycle (1000): Elapsed time: 15.075 sec
>>redis_get + pickle_loads_sympifyed_expr + subs cycle (1000): Elapsed time: 2.929 sec
>>sympify + lambdify + dill_dumps_lambdifyed_expr + redis_set cycle (1000): Elapsed time: 87.707 sec
>>redis_get + dill_loads_lambdifyed_expr + subs cycle (1000): Elapsed time: 2.356 sec
>>sympify + cloudpickle_dumps_sympifyed_expr + redis_set cycle (1000): Elapsed time: 23.633 sec
>>redis_get + cloudpickle_loads_sympifyed_expr + subs cycle (1000): Elapsed time: 3.059 sec
>>sympify + lambdify + cloudpickle_dumps_lambdifyed_expr + redis_set cycle (1000): Elapsed time: 86.739 sec
>>redis_get + cloudpickle_loads_lambdifyed_expr + subs cycle (1000): Elapsed time: 1.721 sec
print('\nTEST performance for complex requests:')
for x in [1,10,100,1000]:
with Profiler('redis_get + cloudpickle_loads_lambdifyed_expr + ' + str(x) + '*subs cycle (' + str(round(num_iter/x)) + '): '):
for i in range(round(num_iter/x)):
loaded_expr = cloudpickle.loads(r.get('expr'))
for j in range(x):
func(*[1 for i in range(len(symbols_set))])
>>TEST performance for complex requests:
>>redis_get + cloudpickle_loads_lambdifyed_expr + 1*subs cycle (1000): Elapsed time: 1.768 sec
>>redis_get + cloudpickle_loads_lambdifyed_expr + 10*subs cycle (100): Elapsed time: 0.204 sec
>>redis_get + cloudpickle_loads_lambdifyed_expr + 100*subs cycle (10): Elapsed time: 0.046 sec
>>redis_get + cloudpickle_loads_lambdifyed_expr + 1000*subs cycle (1): Elapsed time: 0.028 sec
import redis
import pickle
import dill
import cloudpickle
import re
import copy
from time import time
from sympy.utilities.lambdify import lambdify
from sympy import sympify, symbols
class Profiler(object): #профилировщик времени
def __init__(self,info=''):
self.info = info
def __enter__(self):
self._startTime = time()
def __exit__(self, type, value, traceback):
print(self.info, "Elapsed time: {:.3f} sec".format(time() - self._startTime))
num_iter = 1000
dill.settings['recurse'] = True
r = redis.StrictRedis(host='localhost', port=6379, db=0)
with Profiler('read (' + str(num_iter) + '): cycle'):
for i in range(num_iter):
f = open('expr.txt')
expr_txt = f.read()
f.close()
with Profiler('find unique sorted symbols (' + str(num_iter) + '): cycle'):
for i in range(num_iter):
symbols_set = set()
result = re.findall(r"x\d_", expr_txt)
for match in result:
symbols_set.add(match)
symbols_set = sorted(symbols_set)
symbols_list = symbols(symbols_set)
print()
with Profiler('sympify'):
expr = sympify(expr_txt)
with Profiler('lambdify'):
func = lambdify(tuple(symbols_list), expr, 'numpy') # returns a numpy-ready function
print()
with Profiler('subs cycle (' + str(num_iter) + '): cycle'):
for i in range(num_iter):
expr_copy = copy.copy(expr)
for x in symbols_list:
expr_copy = expr_copy.subs(x,1)
with Profiler('subs cycle (' + str(num_iter) + '): lambdify'):
for i in range(num_iter):
func(*[1 for i in range(len(symbols_set))])
print()
print('exp1 == exp2:', round(expr_copy,12) == round(func(*[1 for i in range(len(symbols_set))]),12))
print()
with Profiler('pickle_dumps cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
pickle_dump = pickle.dumps(expr)
with Profiler('pickle_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
pickle.loads(pickle_dump)
print()
with Profiler('cloudpickle_dumps cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
cloudpickle_dump = cloudpickle.dumps(expr)
with Profiler('cloudpickle_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
cloudpickle.loads(cloudpickle_dump)
print()
with Profiler('dill_dumps cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
dill_dump = dill.dumps(expr)
with Profiler('dill_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
dill.loads(dill_dump)
print()
#убедились, что все правильно считает (до 12 знака), сравнили производительность, попробуем побаловаться с redis
with Profiler('redis_set cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
r.set('expr', pickle_dump)
with Profiler('redis_get cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
r.get('expr')
print()
with Profiler('pickle_dumps + redis_set cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
r.set('expr', pickle.dumps(expr))
with Profiler('redis_get + pickle_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
pickle.loads(r.get('expr'))
print()
with Profiler('cloudpickle_dumps + redis_set cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
r.set('expr', cloudpickle.dumps(expr))
with Profiler('redis_get + cloudpickle_loads cycle (' + str(num_iter) + '): sympifyed expr'):
for i in range(num_iter):
cloudpickle.loads(r.get('expr'))
print()
with Profiler('dill_dumps + redis_set cycle (' + str(num_iter) + '): lambdifyed expr'):
for i in range(num_iter):
r.set('expr', dill.dumps(expr))
with Profiler('redis_get + dill_loads cycle (' + str(num_iter) + '): lambdifyed expr'):
for i in range(num_iter):
dill.loads(r.get('expr'))
print('\nFINAL performance test:')
with Profiler('sympify + pickle_dumps_sympifyed_expr + redis_set cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
expr = sympify(expr_txt)
r.set('expr', pickle.dumps(expr))
with Profiler('redis_get + pickle_loads_sympifyed_expr + subs cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
loaded_expr = pickle.loads(r.get('expr'))
expr_copy = copy.copy(loaded_expr)
for x in symbols_list:
expr_copy = expr_copy.subs(x,1)
with Profiler('sympify + lambdify + dill_dumps_lambdifyed_expr + redis_set cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
expr = sympify(expr_txt)
func = lambdify(tuple(symbols_list), expr, 'numpy')
r.set('expr', dill.dumps(expr))
with Profiler('redis_get + dill_loads_lambdifyed_expr + subs cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
loaded_expr = dill.loads(r.get('expr'))
func(*[1 for i in range(len(symbols_set))])
with Profiler('sympify + cloudpickle_dumps_sympifyed_expr + redis_set cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
expr = sympify(expr_txt)
r.set('expr', cloudpickle.dumps(expr))
with Profiler('redis_get + cloudpickle_loads_sympifyed_expr + subs cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
loaded_expr = cloudpickle.loads(r.get('expr'))
expr_copy = copy.copy(loaded_expr)
for x in symbols_list:
expr_copy = expr_copy.subs(x,1)
with Profiler('sympify + lambdify + cloudpickle_dumps_lambdifyed_expr + redis_set cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
expr = sympify(expr_txt)
func = lambdify(tuple(symbols_list), expr, 'numpy')
r.set('expr', cloudpickle.dumps(expr))
with Profiler('redis_get + cloudpickle_loads_lambdifyed_expr + subs cycle (' + str(num_iter) + '): '):
for i in range(num_iter):
loaded_expr = cloudpickle.loads(r.get('expr'))
func(*[1 for i in range(len(symbols_set))])
print('\nTEST performance for complex requests:')
for x in [1,10,100,1000]:
with Profiler('redis_get + cloudpickle_loads_lambdifyed_expr + ' + str(x) + '*subs cycle (' + str(round(num_iter/x)) + '): '):
for i in range(round(num_iter/x)):
loaded_expr = cloudpickle.loads(r.get('expr'))
for j in range(x):
func(*[1 for i in range(len(symbols_set))])
#r.set('expr', func)
>>read (1000): cycle Elapsed time: 0.014 sec
>>find unique sorted symbols (1000): cycle Elapsed time: 0.156 sec
>>
>>sympify Elapsed time: 0.426 sec
>>lambdify Elapsed time: 0.114 sec
>>
>>subs cycle (1000): cycle Elapsed time: 0.245 sec
>>subs cycle (1000): lambdify Elapsed time: 0.026 sec
>>
>>exp1 == exp2: True
>>
>>pickle_dumps cycle (1000): sympifyed expr Elapsed time: 0.430 sec
>>pickle_loads cycle (1000): sympifyed expr Elapsed time: 2.320 sec
>>
>>cloudpickle_dumps cycle (1000): sympifyed expr Elapsed time: 7.584 sec
>>cloudpickle_loads cycle (1000): sympifyed expr Elapsed time: 2.314 sec
>>
>>dill_dumps cycle (1000): sympifyed expr Elapsed time: 8.259 sec
>>dill_loads cycle (1000): sympifyed expr Elapsed time: 2.806 sec
>>
>>redis_set cycle (1000): sympifyed expr Elapsed time: 0.066 sec
>>redis_get cycle (1000): sympifyed expr Elapsed time: 0.051 sec
>>
>>pickle_dumps + redis_set cycle (1000): sympifyed expr Elapsed time: 0.524 sec
>>redis_get + pickle_loads cycle (1000): sympifyed expr Elapsed time: 2.437 sec
>>
>>cloudpickle_dumps + redis_set cycle (1000): sympifyed expr Elapsed time: 7.659 sec
>>redis_get + cloudpickle_loads cycle (1000): sympifyed expr Elapsed time: 2.492 sec
>>
>>dill_dumps + redis_set cycle (1000): lambdifyed expr Elapsed time: 8.333 sec
>>redis_get + dill_loads cycle (1000): lambdifyed expr Elapsed time: 2.932 sec
>>
>>FINAL performance test:
>>sympify + pickle_dumps_sympifyed_expr + redis_set cycle (1000): Elapsed time: 15.075 sec
>>redis_get + pickle_loads_sympifyed_expr + subs cycle (1000): Elapsed time: 2.929 sec
>>sympify + lambdify + dill_dumps_lambdifyed_expr + redis_set cycle (1000): Elapsed time: 87.707 sec
>>redis_get + dill_loads_lambdifyed_expr + subs cycle (1000): Elapsed time: 2.356 sec
>>sympify + cloudpickle_dumps_sympifyed_expr + redis_set cycle (1000): Elapsed time: 23.633 sec
>>redis_get + cloudpickle_loads_sympifyed_expr + subs cycle (1000): Elapsed time: 3.059 sec
>>sympify + lambdify + cloudpickle_dumps_lambdifyed_expr + redis_set cycle (1000): Elapsed time: 86.739 sec
>>redis_get + cloudpickle_loads_lambdifyed_expr + subs cycle (1000): Elapsed time: 1.721 sec
>>
>>TEST performance for complex requests:
>>redis_get + cloudpickle_loads_lambdifyed_expr + 1*subs cycle (1000): Elapsed time: 1.768 sec
>>redis_get + cloudpickle_loads_lambdifyed_expr + 10*subs cycle (100): Elapsed time: 0.204 sec
>>redis_get + cloudpickle_loads_lambdifyed_expr + 100*subs cycle (10): Elapsed time: 0.046 sec
>>redis_get + cloudpickle_loads_lambdifyed_expr + 1000*subs cycle (1): Elapsed time: 0.028 sec
К сожалению, не доступен сервер mySQL