Commit 91e076f6 authored by Eric Dagobert's avatar Eric Dagobert

regimes

parent 1974bf19
......@@ -4,6 +4,8 @@ from config_manager import ConfigReader
import pandas
import numpy as np
from sklearn.covariance import GraphicalLassoCV, LedoitWolf
from sklearn import cluster, covariance, manifold
from math import *
import dill
import sys
if not sys.warnoptions:
......@@ -17,6 +19,44 @@ class DistList:
self._covlist = covlist
self._ilist = ilist
self._index = None
def hellinger(self,n,x,y,smooth,**kwargs):
x1 = x.reshape(n,n+1)
x2 = y.reshape(n,n+1)
mu1,sigma1=x1[:,0],x1[:,1:]
mu2,sigma2=x2[:,0],x2[:,1:]
sign1, ld1 = np.linalg.slogdet(sigma1)
sign2, ld2 = np.linalg.slogdet(sigma2)
dd = 0.5*(sigma1+sigma2)
signdd, ldd = np.linalg.slogdet(dd)
signt = (sign1 * sign2)/signdd
if signt <= 0:
print ('negative')
s_1 = np.linalg.inv(dd)
e = -0.125*(mu1-mu2).T.dot(s_1).dot(mu1-mu2)
#d = pow(d1,.25)*pow(d2,.25)/pow(det1,.5)
d = .25*(ld1+ld2-2*ldd)
d = exp(d)
es = 1-d*exp(e)
#if es <=0:
# es = 0
if smooth:
r = atanh(es)
else:
r = sqrt(es)
return r
def vdist(self,x,centroids,sm):
n = self._covlist.shape[1]
V = np.full((len(centroids)),0.)
for i,u in enumerate(centroids):
V[i] = self.hellinger(n,x,self._covlist[u[1]],sm)
return V
class Distributions:
def __init__(self,varfact=1.1,eps=.2, window=90, overlap=60):
......
......@@ -11,6 +11,7 @@ from math import *
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from sklearn import cluster, covariance, manifold
from tabulate import tabulate
......@@ -23,23 +24,24 @@ class Clusters:
distfile = conf.mastercovlist()
with open(distfile,'rb') as f:
self._distlist = dill.load(f)
def spectral_centroids(self):
self._centroids=[]
for i in range(self._clabels.max()+1):
idx=self._clabels==i
def spectral_centroids(self, labels):
centroids=[]
for i in range(labels.max()+1):
idx=labels==i
mu = self._embedding[:,idx].mean(axis=1).reshape(2,1)
#find the closest point to mu
dist = np.square(self._embedding-mu).sum(axis=0)
i2 = np.argmin(dist)
covs = self._distlist._covlist
self._centroids.append((self._embedding[:,i2],i2, self._embedding[:,idx].shape[1], covs[i2]))
def graph_clusters(self):
t = 'clusters'
centroids.append((self._embedding[:,i2],i2, self._embedding[:,idx].shape[1], covs[i2]))
return centroids
def graph_(self,centroids, labels,t = 'clusters'):
embedding = self._embedding
centroids = self._centroids
L = self._clabels
L = labels
colormap = [cm.gnuplot2(i) for i in np.linspace(0.,1.,max(L)+2)]
colorms = [colormap[i] for i in L]
......@@ -63,38 +65,11 @@ class Clusters:
plt.scatter(z[0][0],z[0][1],s=50,c=[colormap[i]],alpha=1.0)
plt.title(t)
plt.show()
plt.savefig('clusters')
plt.close(fig)
def hellinger(self,n,x,y,smooth,**kwargs):
x1 = x.reshape(n,n+1)
x2 = y.reshape(n,n+1)
mu1,sigma1=x1[:,0],x1[:,1:]
mu2,sigma2=x2[:,0],x2[:,1:]
sign1, ld1 = np.linalg.slogdet(sigma1)
sign2, ld2 = np.linalg.slogdet(sigma2)
dd = 0.5*(sigma1+sigma2)
signdd, ldd = np.linalg.slogdet(dd)
signt = (sign1 * sign2)/signdd
if signt <= 0:
print ('negative')
s_1 = np.linalg.inv(dd)
e = -0.125*(mu1-mu2).T.dot(s_1).dot(mu1-mu2)
#d = pow(d1,.25)*pow(d2,.25)/pow(det1,.5)
d = .25*(ld1+ld2-2*ldd)
d = exp(d)
es = 1-d*exp(e)
#if es <=0:
# es = 0
if smooth:
r = atanh(es)
else:
r = sqrt(es)
return r
def compute_clusters(self):
......@@ -114,7 +89,7 @@ class Clusters:
dill.dump(self._clabels,f)
with open('clusterengine.plk','wb') as f:
dill.dump(self._agg,f)
self.spectral_centroids()
self._centroids = self.spectral_centroids(self._clabels)
def cal_affinity_matrix(self):
if (os.path.isfile('hellinger.plk')):
......@@ -129,7 +104,7 @@ class Clusters:
for v in cind:
br.iterate()
if hellingerd[v,u] == 0:
hellingerd[u,v] = self.hellinger(n,covs[u],covs[v],self._conf.smooth_hellinger())
hellingerd[u,v] = self._distlist.hellinger(n,covs[u],covs[v],self._conf.smooth_hellinger())
else:
hellingerd[u,v] = hellingerd[v,u]
......@@ -137,6 +112,123 @@ class Clusters:
with open('hellinger.plk','wb') as f:
dill.dump(hellingerd,f)
class Regimes:
def __init__(self, clusters):
self._clusters = clusters
self._reg_threshold = clusters._conf.reg_threshold()
def printout_centroids(self,prefix):
#distances
D = np.full((len(self._rcentroids),len(self._rcentroids)),0.)
rows=[]
sm = self._clusters._conf.smooth_hellinger()
hellinger = self._clusters._distlist.hellinger
n = self._clusters._distlist._covlist.shape[1]
data = self._clusters._distlist._covlist
for i,x in enumerate(self._rcentroids):
print (prefix +'#',i,':',x[2],'elements')
row=[prefix + '#%d'%i]
for j,y in enumerate(self._rcentroids):
h = hellinger(n,data[x[1]],data[y[1]],sm)
row.append(h)
rows.append(row)
headers=[prefix + '#%d'%r for r in range(len(self._rcentroids))]
table = tabulate(rows,headers= headers,tablefmt='orgtbl',floatfmt='.3f')
print (table)
def merge_regimes(self,s,t):
idx = np.where(self._regimes==s)
self._regimes[idx]=t
idx = np.where(self._regimes > s)
self._regimes[idx] = self._regimes[idx]-1
for k,x in self._rdates.items():
if x == s:
self._rdates[k] = t
elif x > s:
self._rdates[k] = x-1
def delete_centroid(self,s):
for k in range(s,len(self._rcentroids)-1):
self._rcentroids[k]=self._rcentroids[k+1]
del self._rcentroids[len(self._rcentroids)-1]
def compute_inter(self):
dist = self._clusters._distlist
K = 0
lk = 0
dlabels = dict()
rlabels = np.array(self._clusters._clabels)
vregimes = []
for x,dt in enumerate(dist._index):
for l, kitem in enumerate(dist._ilist[K:]):
if x < kitem[0]:
break
if x >= kitem[1]:
lk += 1
continue
if not dt in dlabels.keys():
dlabels[dt] = set([rlabels[l+K]])
else:
dlabels[dt].add(rlabels[l+K])
K += lk
lk = 0
self._dlabels = dlabels
def label_regimes(self):
self._rcounts = dict()
self._rdates = dict()
self._reflabel = dict()
x = 0
regimes = []
indlist = list(self._clusters._distlist._ilist)
for k,v in self._dlabels.items():
key = tuple(sorted(v))
if key in self._rcounts.keys():
self._rcounts[key] += 1
else:
self._rcounts[key] = 1
self._reflabel[key]= len(self._reflabel)
self._rdates[k]=self._reflabel[key]
if (indlist and x == indlist[0][0]):
indlist = indlist[1:]
regimes.append(self._reflabel[key])
x += 1
self._regimes = np.array(regimes)
self._rcentroids = self._clusters.spectral_centroids(self._regimes)
def filter_small(self):
cont=True
s = 0
t = 1
data = self._clusters._distlist._covlist
while cont :
if self._rcentroids[s][2] > self._reg_threshold:
s+=1
t=0
if s >= len(self._rcentroids):
cont=False
continue
distarget = data[self._rcentroids[s][1]]
v = self._clusters._distlist.vdist(distarget, self._rcentroids, self._clusters._conf.smooth_hellinger())
v[s]=100.
t = np.argmin(v)
self.merge_regimes(s,t)
if len(self._rcentroids) > 1:
self.delete_centroid(s)
if s >= len(self._rcentroids):
s-=1
else:
cont=False
#re-evaluate centroids
self._rcentroids = self._clusters.spectral_centroids(self._regimes)
#cl._dt2rg()
if __name__ == "__main__":
......@@ -144,4 +236,11 @@ if __name__ == "__main__":
cl = Clusters(conf)
cl.cal_affinity_matrix()
cl.compute_clusters()
cl.graph_clusters()
\ No newline at end of file
r = Regimes(cl)
r.compute_inter()
r.label_regimes()
r.printout_centroids('regimes')
r.filter_small()
r.printout_centroids('regimes')
r._clusters.graph_(r._rcentroids,r._regimes,'regimes')
\ No newline at end of file
......@@ -21,4 +21,7 @@ eigen_solver=arpack
spectraln_neighbors=6
n_neighbors=10
random_states=48
smooth hellinger=True
\ No newline at end of file
smooth hellinger=True
[regimes]
threshold = 4
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment