Commit 2a9cd4f7 authored by Eric Dagobert's avatar Eric Dagobert

added MCMC simulations

parent 967b3513
from config_manager import ConfigReader
from distribs import Distributions
import pandas
import numpy as np
from objects import ObjectContainer
import operator
import datetime
from series_c import *
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
import queue
from time import time
import dill
class RandomGenerator:
_lastx=0
def __init__(self,transitions, covariances):
self._transitions = transitions
self._covariances = covariances
self._tr = {}
self._vect = {}
def generate(self, n):
self._n = n
for x in range(self._transitions.shape[0]):
probs = self._transitions[x,:]
self._tr[x] = np.random.choice(len(probs),p=probs,size=n)
if x not in self._covariances:
print (x, 'not in covariances')
mu,sigma = self._covariances[x]
if type(mu) is not int:
self._vect[x] = np.random.multivariate_normal(mu,sigma, size=n)
def get_next(self, tran):
nothere = True
while nothere:
RandomGenerator._lastx += 1
i = RandomGenerator._lastx % self._n
new = self._tr[tran][i]
nothere = new not in self._vect.keys()
return new
def get_vector(self,rid):
RandomGenerator._lastx += 1
i = RandomGenerator._lastx % self._n
return self._vect[rid][i]
class MCMC:
def __init__(self,conf, serie):
self._serie = serie
self._paths = None
self._conf=conf
self._transitions = ObjectContainer.getInstance().get('transitions')
self._q = queue.Queue()
#self._run = threading.Event()
self._threads = []
self._Nthreads = 12
#self._complete = threading.Event()
self._processed = None
self._generator = RandomGenerator(self._transitions, self._serie._covars._covariances)
self._generator.generate(40000)
def _thread_run(self):
#while True:
self._run.wait()
while not self._complete.is_set():
try:
x,dt1,dt2 = self._q.get(block=False, timeout=1)
except queue.Empty:
if self._processed.qsize() == self._npaths:
self._complete.set()
return
reg,lret = self.run_path(dt1, dt2)
self._regimes[x,:] = reg
self._logret[x,:] = lret
self._processed.put(x)
if self._processed.qsize() == self._npaths:
self._complete.set()
def start_threads(self):
for i in range(self._Nthreads):
t = threading.Thread(target=self._thread_run)
t.start()
self._threads.append(t)
self._run.set()
def random_r(self,probs):
tr = True
r = -1
while tr:
r = np.random.choice(len(probs),p=probs)
mu, sigma = self._serie._covars._covariances[r]
tr = type(sigma) is int
return r, mu, sigma
def run_path(self,x,datefrom, dateto):
regimes = np.zeros(self._Taxis.shape).astype(int)
regimes[0] = self._serie.getR(datefrom)
slogrets = self._serie.values(datefrom,dateto)
logret = np.zeros(slogrets.shape)
logret[0] = slogrets[0,:]
for ip in range(1,self._Taxis.shape[0]):
r_ = regimes[ip-1]
rnext = self._generator.get_next(r_)
vect = self._generator.get_vector(rnext)
#probs = self._transitions[r_,:]
#n_,mu,sigma = self.random_r(probs)
regimes[ip]= rnext
logret[ip] = vect
#logret[ip] = np.random.multivariate_normal(mu,sigma)
self._regimes[x,:] = regimes
self._logret[x,:] = logret
#return (regimes, logret)
def run(self, datefrom, dateto):
self._npaths = self._conf.mcmc_simul_npaths()
self._Taxis = self._serie.get_dates(datefrom,dateto)
nsteps = self._Taxis.shape[0]
self._regimes = np.zeros((self._npaths, nsteps)).astype(int)
self._logret = np.zeros((self._npaths, nsteps,self._serie._values.shape[1]))
for x in range(self._npaths):
self.run_path(x,datefrom,dateto)
return
if __name__ == "__main__":
start = time()
conf = ConfigReader('regimes.ini')
ts = Series(conf)
end = time()
print ('build ts',end-start)
ts.init()
start = end
end = time()
print ('init ts',end-start)
datefrom = ts.closest_date(np.datetime64('2007-09-01'))
dateto = ts.closest_date(np.datetime64('2008-03-01'))
ts.calculate(None, dateto)
start = end
end = time()
print ('calculate ts',end-start)
mc = MCMC(conf,ts)
start = end
end = time()
print ('build MC',end-start)
mc.run(datefrom, dateto)
start = end
end = time()
print ('run MC',end-start)
print (mc._regimes)
\ No newline at end of file
from config_manager import ConfigReader
from distribs import Distributions
import pandas
import numpy as np
from objects import ObjectContainer
import operator
import datetime
from series import *
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
import queue
from time import time
import dill
class RandomGenerator:
_lastx=0
def __init__(self,transitions, covariances):
self._transitions = transitions
self._covariances = covariances
self._tr = {}
self._vect = {}
def generate(self, n):
self._n = n
for x in range(self._transitions.shape[0]):
probs = self._transitions[x,:]
self._tr[x] = np.random.choice(len(probs),p=probs,size=n)
if x not in self._covariances:
print (x, 'not in covariances')
mu,sigma = self._covariances[x]
if type(mu) is not int:
self._vect[x] = np.random.multivariate_normal(mu,sigma, size=n)
def get_next(self, tran):
nothere = True
while nothere:
RandomGenerator._lastx += 1
i = RandomGenerator._lastx % self._n
new = self._tr[tran][i]
nothere = new not in self._vect.keys()
return new
def get_vector(self,rid):
RandomGenerator._lastx += 1
i = RandomGenerator._lastx % self._n
return self._vect[rid][i]
class MCMC:
def __init__(self,conf, serie):
self._serie = serie
self._paths = None
self._conf=conf
self._transitions = ObjectContainer.getInstance().get('transitions')
self._q = queue.Queue()
#self._run = threading.Event()
self._threads = []
self._Nthreads = 12
#self._complete = threading.Event()
self._processed = None
self._generator = RandomGenerator(self._transitions, self._serie._covars._covariances)
self._generator.generate(10000)
def _thread_run(self):
#while True:
self._run.wait()
while not self._complete.is_set():
try:
x,dt1,dt2 = self._q.get(block=False, timeout=1)
except queue.Empty:
if self._processed.qsize() == self._npaths:
self._complete.set()
return
reg,lret = self.run_path(dt1, dt2)
self._regimes[x,:] = reg
self._logret[x,:] = lret
self._processed.put(x)
if self._processed.qsize() == self._npaths:
self._complete.set()
def start_threads(self):
for i in range(self._Nthreads):
t = threading.Thread(target=self._thread_run)
t.start()
self._threads.append(t)
self._run.set()
def random_r(self,probs):
tr = True
r = -1
while tr:
r = np.random.choice(len(probs),p=probs)
mu, sigma = self._serie._covars._covariances[r]
tr = type(sigma) is int
return r, mu, sigma
def run_path(self,x,datefrom, dateto):
regimes = np.zeros(self._Taxis.shape).astype(int)
regimes[0] = self._serie.getR(datefrom)
slogrets = self._serie.values(datefrom,dateto)
logret = np.zeros(slogrets.shape)
logret[0] = slogrets[0,:]
for ip in range(1,self._Taxis.shape[0]):
r_ = regimes[ip-1]
rnext = self._generator.get_next(r_)
vect = self._generator.get_vector(rnext)
#probs = self._transitions[r_,:]
#n_,mu,sigma = self.random_r(probs)
regimes[ip]= rnext
logret[ip] = vect
#logret[ip] = np.random.multivariate_normal(mu,sigma)
self._regimes[x,:] = regimes
self._logret[x,:] = logret
#return (regimes, logret)
def run(self, datefrom, dateto):
self._npaths = self._conf.mcmc_simul_npaths()
self._Taxis = self._serie.get_dates(datefrom,dateto)
nsteps = self._Taxis.shape[0]
self._regimes = np.zeros((self._npaths, nsteps)).astype(int)
self._logret = np.zeros((self._npaths, nsteps,self._serie._values.shape[1]))
for x in range(self._npaths):
self.run_path(x,datefrom,dateto)
return
if __name__ == "__main__":
start = time()
conf = ConfigReader('regimes.ini')
ts = Series(conf)
end = time()
print ('build ts',end-start)
ts.init()
start = end
end = time()
print ('init ts',end-start)
datefrom = ts.closest_date(np.datetime64('2007-09-01'))
dateto = ts.closest_date(np.datetime64('2008-03-01'))
ts.calculate(None, dateto)
start = end
end = time()
print ('calculate ts',end-start)
mc = MCMC(conf,ts)
start = end
end = time()
print ('build MC',end-start)
mc.run(datefrom, dateto)
start = end
end = time()
print ('run MC',end-start)
print (mc._regimes)
\ No newline at end of file
from config_manager import ConfigReader
from distribs import Distributions
import pandas
import numpy as np
from objects import ObjectContainer
import operator
import datetime
from sklearn.covariance import GraphLassoCV, LedoitWolf
#multivariate time series
from operator import neg
from math import sqrt
from joblib import Parallel,delayed
def compute_covars_( r, values, datefrom,dateto):
if values.shape[0] < 3:
#self._covariances[r] = (0,0)
print ('covariance',r,'nothing')
return r,0,0
mu = np.mean(values,axis = 0)
model = GraphLassoCV().fit(values)
sigma = model.covariance_
#self._covariances[r] = (mu,sigma)
print ('covariance',r,'computed')
#if self._pr is not None:
# self._pr.iterate()
return r,mu,sigma
class CovarHelper:
def __init__(self,mset,mindex, rset,rindex, regime):
self._mset = mset
self._rset = rset
self._mindex = mindex
self._rindex = rindex
self._r = regime
self._data = {}
self.initialize()
self._covariances = {}
self._pr = None
def initialize(self):
reg = self._r
for R in np.unique(reg._regimes):
indx = np.fromiter((k for k,v in reg._rdates.items() if v == R),'datetime64[D]')
#merge master and simul data
rids = self._rindex[np.isin(self._rindex, indx)]
mids = self._mindex[np.isin(self._mindex, indx)]
iids = np.intersect1d(rids,mids)
rmask = np.ma.masked_where(np.isin(self._rindex, iids), self._rindex)
mmask = np.ma.masked_where(np.isin(self._mindex, iids), self._mindex)
rdata = np.hstack((self._mset[mmask.mask],self._rset[rmask.mask]))
self._data[R]=(iids,rdata)
def get(self, R, datefrom, dateto):
d = self._data[R]
iids = d[0]
mask=np.ones(iids.shape).astype('bool')
if datefrom is not None:
imask = np.ma.masked_where(iids >= datefrom, iids)
mask &= imask.mask
if dateto is not None:
imask = np.ma.masked_where(iids <= dateto, iids)
mask &= imask.mask
return d[1][mask,:]
def compute_covars(self,datefrom,dateto):
values = {}
for r in self._data.keys():
values[r] = self.get(r, datefrom, dateto)
print ('keys:',values.keys())
z = Parallel()(delayed(compute_covars_)(r,values[r], datefrom,dateto) for r in values.keys())
for r,mu,sigma in z:
self._covariances[r] = (mu,sigma)
class Series:
def __init__(self,conf):
self._conf = conf
self._tickers = conf.series_simul_tickers()
self._filesource = conf.series_simul_data()
def attach_regime(self, r):
mfile = self._conf.distrmastersource()
mfields = self._conf.distrfields()
w = self._conf.distrws()
ov = self._conf.distroverlap()
D = Distributions(varfact=1.1,eps=.2, window=w, overlap=ov)
D.load(mfile, mfields)
self._regime = r
mindex = pandas.to_datetime(D._index).values.astype('datetime64[D]')
#merge
index = np.intersect1d(mindex,self._indlogret)
self._covars = CovarHelper(D._logret,mindex,self._logret,self._indlogret, r)
rmask = np.ma.masked_where(np.isin(self._indlogret, index), self._indlogret)
mmask = np.ma.masked_where(np.isin(mindex, index), mindex)
self._values = np.hstack((self._logret[mmask.mask],D._logret[rmask.mask]))
self._index = index
def calculate(self, datefrom, dateto):
self._covars.compute_covars(datefrom, dateto)
print ('calculate ok',self._covars._covariances.keys())
def getR(self, dateat):
rdates = self._regime._rdates
if not dateat in rdates.keys():
return None
return rdates[dateat]
def closest_date(self, dateat):
rdates = self._regime._rdates
for steps in [(0,'D'),(1,'D'),(-1,'D'),(2,'D'),(-2,'D'),(3,'D'),(-3,'D'),(4,'D')]:
d = dateat + np.timedelta64(steps[0],steps[1])
if d in rdates.keys():
return d
return None
def values(self, datefrom, dateto):
mask=np.ones(self._index.shape).astype('bool')
if datefrom is not None:
imask = np.ma.masked_where(self._index >= datefrom, self._index)
mask &= imask.mask
if dateto is not None:
imask = np.ma.masked_where(self._index < dateto, self._index)
mask &= imask.mask
return self._values[mask,:]
def calc_logret(self):
self._indlogret = self._index[1:,]
shp = (len(self._indlogret),) + self._close.shape[1:]
self._logret = np.zeros(shp)
for i in range(1,self._close.shape[0]):
x,x_ = self._close[i-1:i+1,]
self._logret[i-1,] = np.log(x_/x)
return
def get_dates(self, datefrom,dateto):
mask=np.ones(self._index.shape).astype('bool')
if datefrom is not None:
imask = np.ma.masked_where(self._index >= datefrom,self._index)
mask &= imask.mask
if dateto is not None:
imask = np.ma.masked_where(self._index < dateto, self._index)
mask &= imask.mask
return self._index[mask]
def _load(self):
w = self._conf.distrws()
ov = self._conf.distroverlap()
d = Distributions(varfact=1.,eps=.2, window=w, overlap=ov)
d.load(self._filesource, self._tickers)
index = d._index
self._index = np.array(pandas.to_datetime(index)).astype('datetime64[D]')
self._close = d._logret
self.calc_logret()
def init(self):
self._load()
ObjectContainer.getInstance().load()
self.attach_regime(ObjectContainer.getInstance().get('regimes'))
if __name__ == "__main__":
z = Parallel(n_jobs=2)(delayed(sqrt)(x**2) for x in range(10))
print(z)
conf = ConfigReader('regimes.ini')
ts = Series(conf)
ts.init()
dateto = np.datetime64('1987-04-30')
datefrom = np.datetime64('1987-04-01')
#ids = ts.get_dates(datefrom,dateto)
#r = ts.getR(datefrom)
#dateto = np.datetime64('2007-04-02')
#datefrom = np.datetime64('1987-01-05')
ts.calculate(None, dateto)
#print (ts._covars._covariances.keys())
from config_manager import ConfigReader
from distribs import Distributions
import pandas
import numpy as np
from objects import ObjectContainer
import operator
import datetime
from sklearn.covariance import GraphLassoCV, LedoitWolf
#multivariate time series
from operator import neg
from math import sqrt
from joblib import Parallel,delayed
def compute_covars_( r, values, datefrom,dateto):
if values.shape[0] < 3:
#self._covariances[r] = (0,0)
print ('covariance',r,'nothing')
return r,0,0
mu = np.mean(values,axis = 0)
model = GraphLassoCV().fit(values)
sigma = model.covariance_
#self._covariances[r] = (mu,sigma)
print ('covariance',r,'computed')
#if self._pr is not None:
# self._pr.iterate()
return r,mu,sigma
class CovarHelper:
def __init__(self,mset,mindex, rset,rindex, regime):
self._mset = mset
self._rset = rset
self._mindex = mindex
self._rindex = rindex
self._r = regime
self._data = {}
self.initialize()
self._covariances = {}
self._pr = None
def initialize(self):
reg = self._r
for R in np.unique(reg._regimes):
indx = np.fromiter((k for k,v in reg._rdates.items() if v == R),'datetime64[D]')
#merge master and simul data
rids = self._rindex[np.isin(self._rindex, indx)]
mids = self._mindex[np.isin(self._mindex, indx)]
iids = np.intersect1d(rids,mids)
rmask = np.ma.masked_where(np.isin(self._rindex, iids), self._rindex)
mmask = np.ma.masked_where(np.isin(self._mindex, iids), self._mindex)
rdata = np.hstack((self._mset[mmask.mask],self._rset[rmask.mask]))
self._data[R]=(iids,rdata)
def get(self, R, datefrom, dateto):
d = self._data[R]
iids = d[0]
mask=np.ones(iids.shape).astype('bool')
if datefrom is not None:
imask = np.ma.masked_where(iids >= datefrom, iids)
mask &= imask.mask
if dateto is not None:
imask = np.ma.masked_where(iids <= dateto, iids)
mask &= imask.mask
return d[1][mask,:]
def compute_covars(self,datefrom,dateto):
values = {}
for r in self._data.keys():
values[r] = self.get(r, datefrom, dateto)
print ('keys:',values.keys())
z = Parallel()(delayed(compute_covars_)(r,values[r], datefrom,dateto) for r in values.keys())
for r,mu,sigma in z:
self._covariances[r] = (mu,sigma)
class Series:
def __init__(self,conf):
self._conf = conf
self._tickers = conf.series_simul_tickers()
self._filesource = conf.series_simul_data()
def attach_regime(self, r):
mfile = self._conf.distrmastersource()
mfields = self._conf.distrfields()
w = self._conf.distrws()
ov = self._conf.distroverlap()
D = Distributions(varfact=1.1,eps=.2, window=w, overlap=ov)
D.load(mfile, mfields)
self._regime = r
mindex = pandas.to_datetime(D._index).values.astype('datetime64[D]')
#merge
index = np.intersect1d(mindex,self._indlogret)
self._covars = CovarHelper(D._logret,mindex,self._logret,self._indlogret, r)
rmask = np.ma.masked_where(np.isin(self._indlogret, index), self._indlogret)
mmask = np.ma.masked_where(np.isin(mindex, index), mindex)
self._values = np.hstack((self._logret[mmask.mask],D._logret[rmask.mask]))
self._index = index
def calculate(self, datefrom, dateto):
self._covars.compute_covars(datefrom, dateto)
print ('calculate ok',self._covars._covariances.keys())
def getR(self, dateat):
rdates = self._regime._rdates
if not dateat in rdates.keys():
return None
return rdates[dateat]
def closest_date(self, dateat):
rdates = self._regime._rdates
for steps in [(0,'D'),(1,'D'),(-1,'D'),(2,'D'),(-2,'D'),(3,'D'),(-3,'D'),(4,'D')]:
d = dateat + np.timedelta64(steps[0],steps[1])
if d in rdates.keys():
return d
return None
def values(self, datefrom, dateto):
mask=np.ones(self._index.shape).astype('bool')
if datefrom is not None:
imask = np.ma.masked_where(self._index >= datefrom, self._index)
mask &= imask.mask
if dateto is not None:
imask = np.ma.masked_where(self._index < dateto, self._index)
mask &= imask.mask
return self._values[mask,:]
def calc_logret(self):
self._indlogret = self._index[1:,]
shp = (len(self._indlogret),) + self._close.shape[1:]
self._logret = np.zeros(shp)
for i in range(1,self._close.shape[0]):
x,x_ = self._close[i-1:i+1,]
self._logret[i-1,] = np.log(x_/x)
return
def get_dates(self, datefrom,dateto):
mask=np.ones(self._index.shape).astype('bool')
if datefrom is not None:
imask = np.ma.masked_where(self._index >= datefrom,self._index)
mask &= imask.mask
if dateto is not None:
imask = np.ma.masked_where(self._index < dateto, self._index)
mask &= imask.mask
return self._index[mask]
def _load(self):
w = self._conf.distrws()
ov = self._conf.distroverlap()
d = Distributions(varfact=1.,eps=.2, window=w, overlap=ov)
d.load(self._filesource, self._tickers)
index = d._index
self._index = np.array(pandas.to_datetime(index)).astype('datetime64[D]')
self._close = d._logret
self.calc_logret()
def init(self):
self._load()
ObjectContainer.getInstance().load()
self.attach_regime(ObjectContainer.getInstance().get('regimes'))
if __name__ == "__main__":
z = Parallel(n_jobs=2)(delayed(sqrt)(x**2) for x in range(10))
print(z)
conf = ConfigReader('regimes.ini')
ts = Series(conf)
ts.init()
dateto = np.datetime64('1987-04-30')
datefrom = np.datetime64('1987-04-01')
#ids = ts.get_dates(datefrom,dateto)
#r = ts.getR(datefrom)
#dateto = np.datetime64('2007-04-02')
#datefrom = np.datetime64('1987-01-05')
ts.calculate(None, dateto)
#print (ts._covars._covariances.keys())
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules=cythonize("series_c.pyx")
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment