Commit 91e076f6 authored by Eric Dagobert's avatar Eric Dagobert

regimes

parent 1974bf19
...@@ -4,6 +4,8 @@ from config_manager import ConfigReader ...@@ -4,6 +4,8 @@ from config_manager import ConfigReader
import pandas import pandas
import numpy as np import numpy as np
from sklearn.covariance import GraphicalLassoCV, LedoitWolf from sklearn.covariance import GraphicalLassoCV, LedoitWolf
from sklearn import cluster, covariance, manifold
from math import *
import dill import dill
import sys import sys
if not sys.warnoptions: if not sys.warnoptions:
...@@ -17,6 +19,44 @@ class DistList: ...@@ -17,6 +19,44 @@ class DistList:
self._covlist = covlist self._covlist = covlist
self._ilist = ilist self._ilist = ilist
self._index = None self._index = None
def hellinger(self,n,x,y,smooth,**kwargs):
x1 = x.reshape(n,n+1)
x2 = y.reshape(n,n+1)
mu1,sigma1=x1[:,0],x1[:,1:]
mu2,sigma2=x2[:,0],x2[:,1:]
sign1, ld1 = np.linalg.slogdet(sigma1)
sign2, ld2 = np.linalg.slogdet(sigma2)
dd = 0.5*(sigma1+sigma2)
signdd, ldd = np.linalg.slogdet(dd)
signt = (sign1 * sign2)/signdd
if signt <= 0:
print ('negative')
s_1 = np.linalg.inv(dd)
e = -0.125*(mu1-mu2).T.dot(s_1).dot(mu1-mu2)
#d = pow(d1,.25)*pow(d2,.25)/pow(det1,.5)
d = .25*(ld1+ld2-2*ldd)
d = exp(d)
es = 1-d*exp(e)
#if es <=0:
# es = 0
if smooth:
r = atanh(es)
else:
r = sqrt(es)
return r
def vdist(self,x,centroids,sm):
n = self._covlist.shape[1]
V = np.full((len(centroids)),0.)
for i,u in enumerate(centroids):
V[i] = self.hellinger(n,x,self._covlist[u[1]],sm)
return V
class Distributions: class Distributions:
def __init__(self,varfact=1.1,eps=.2, window=90, overlap=60): def __init__(self,varfact=1.1,eps=.2, window=90, overlap=60):
......
...@@ -11,6 +11,7 @@ from math import * ...@@ -11,6 +11,7 @@ from math import *
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.pyplot import cm from matplotlib.pyplot import cm
from sklearn import cluster, covariance, manifold from sklearn import cluster, covariance, manifold
from tabulate import tabulate
...@@ -23,23 +24,24 @@ class Clusters: ...@@ -23,23 +24,24 @@ class Clusters:
distfile = conf.mastercovlist() distfile = conf.mastercovlist()
with open(distfile,'rb') as f: with open(distfile,'rb') as f:
self._distlist = dill.load(f) self._distlist = dill.load(f)
def spectral_centroids(self):
self._centroids=[] def spectral_centroids(self, labels):
for i in range(self._clabels.max()+1): centroids=[]
idx=self._clabels==i for i in range(labels.max()+1):
idx=labels==i
mu = self._embedding[:,idx].mean(axis=1).reshape(2,1) mu = self._embedding[:,idx].mean(axis=1).reshape(2,1)
#find the closest point to mu #find the closest point to mu
dist = np.square(self._embedding-mu).sum(axis=0) dist = np.square(self._embedding-mu).sum(axis=0)
i2 = np.argmin(dist) i2 = np.argmin(dist)
covs = self._distlist._covlist covs = self._distlist._covlist
self._centroids.append((self._embedding[:,i2],i2, self._embedding[:,idx].shape[1], covs[i2])) centroids.append((self._embedding[:,i2],i2, self._embedding[:,idx].shape[1], covs[i2]))
return centroids
def graph_clusters(self):
t = 'clusters' def graph_(self,centroids, labels,t = 'clusters'):
embedding = self._embedding embedding = self._embedding
centroids = self._centroids L = labels
L = self._clabels
colormap = [cm.gnuplot2(i) for i in np.linspace(0.,1.,max(L)+2)] colormap = [cm.gnuplot2(i) for i in np.linspace(0.,1.,max(L)+2)]
colorms = [colormap[i] for i in L] colorms = [colormap[i] for i in L]
...@@ -63,38 +65,11 @@ class Clusters: ...@@ -63,38 +65,11 @@ class Clusters:
plt.scatter(z[0][0],z[0][1],s=50,c=[colormap[i]],alpha=1.0) plt.scatter(z[0][0],z[0][1],s=50,c=[colormap[i]],alpha=1.0)
plt.title(t) plt.title(t)
plt.show()
plt.savefig('clusters') plt.savefig('clusters')
plt.close(fig) plt.close(fig)
def hellinger(self,n,x,y,smooth,**kwargs):
x1 = x.reshape(n,n+1)
x2 = y.reshape(n,n+1)
mu1,sigma1=x1[:,0],x1[:,1:]
mu2,sigma2=x2[:,0],x2[:,1:]
sign1, ld1 = np.linalg.slogdet(sigma1)
sign2, ld2 = np.linalg.slogdet(sigma2)
dd = 0.5*(sigma1+sigma2)
signdd, ldd = np.linalg.slogdet(dd)
signt = (sign1 * sign2)/signdd
if signt <= 0:
print ('negative')
s_1 = np.linalg.inv(dd)
e = -0.125*(mu1-mu2).T.dot(s_1).dot(mu1-mu2)
#d = pow(d1,.25)*pow(d2,.25)/pow(det1,.5)
d = .25*(ld1+ld2-2*ldd)
d = exp(d)
es = 1-d*exp(e)
#if es <=0:
# es = 0
if smooth:
r = atanh(es)
else:
r = sqrt(es)
return r
def compute_clusters(self): def compute_clusters(self):
...@@ -114,7 +89,7 @@ class Clusters: ...@@ -114,7 +89,7 @@ class Clusters:
dill.dump(self._clabels,f) dill.dump(self._clabels,f)
with open('clusterengine.plk','wb') as f: with open('clusterengine.plk','wb') as f:
dill.dump(self._agg,f) dill.dump(self._agg,f)
self.spectral_centroids() self._centroids = self.spectral_centroids(self._clabels)
def cal_affinity_matrix(self): def cal_affinity_matrix(self):
if (os.path.isfile('hellinger.plk')): if (os.path.isfile('hellinger.plk')):
...@@ -129,7 +104,7 @@ class Clusters: ...@@ -129,7 +104,7 @@ class Clusters:
for v in cind: for v in cind:
br.iterate() br.iterate()
if hellingerd[v,u] == 0: if hellingerd[v,u] == 0:
hellingerd[u,v] = self.hellinger(n,covs[u],covs[v],self._conf.smooth_hellinger()) hellingerd[u,v] = self._distlist.hellinger(n,covs[u],covs[v],self._conf.smooth_hellinger())
else: else:
hellingerd[u,v] = hellingerd[v,u] hellingerd[u,v] = hellingerd[v,u]
...@@ -137,6 +112,123 @@ class Clusters: ...@@ -137,6 +112,123 @@ class Clusters:
with open('hellinger.plk','wb') as f: with open('hellinger.plk','wb') as f:
dill.dump(hellingerd,f) dill.dump(hellingerd,f)
class Regimes:
def __init__(self, clusters):
self._clusters = clusters
self._reg_threshold = clusters._conf.reg_threshold()
def printout_centroids(self,prefix):
#distances
D = np.full((len(self._rcentroids),len(self._rcentroids)),0.)
rows=[]
sm = self._clusters._conf.smooth_hellinger()
hellinger = self._clusters._distlist.hellinger
n = self._clusters._distlist._covlist.shape[1]
data = self._clusters._distlist._covlist
for i,x in enumerate(self._rcentroids):
print (prefix +'#',i,':',x[2],'elements')
row=[prefix + '#%d'%i]
for j,y in enumerate(self._rcentroids):
h = hellinger(n,data[x[1]],data[y[1]],sm)
row.append(h)
rows.append(row)
headers=[prefix + '#%d'%r for r in range(len(self._rcentroids))]
table = tabulate(rows,headers= headers,tablefmt='orgtbl',floatfmt='.3f')
print (table)
def merge_regimes(self,s,t):
idx = np.where(self._regimes==s)
self._regimes[idx]=t
idx = np.where(self._regimes > s)
self._regimes[idx] = self._regimes[idx]-1
for k,x in self._rdates.items():
if x == s:
self._rdates[k] = t
elif x > s:
self._rdates[k] = x-1
def delete_centroid(self,s):
for k in range(s,len(self._rcentroids)-1):
self._rcentroids[k]=self._rcentroids[k+1]
del self._rcentroids[len(self._rcentroids)-1]
def compute_inter(self):
dist = self._clusters._distlist
K = 0
lk = 0
dlabels = dict()
rlabels = np.array(self._clusters._clabels)
vregimes = []
for x,dt in enumerate(dist._index):
for l, kitem in enumerate(dist._ilist[K:]):
if x < kitem[0]:
break
if x >= kitem[1]:
lk += 1
continue
if not dt in dlabels.keys():
dlabels[dt] = set([rlabels[l+K]])
else:
dlabels[dt].add(rlabels[l+K])
K += lk
lk = 0
self._dlabels = dlabels
def label_regimes(self):
self._rcounts = dict()
self._rdates = dict()
self._reflabel = dict()
x = 0
regimes = []
indlist = list(self._clusters._distlist._ilist)
for k,v in self._dlabels.items():
key = tuple(sorted(v))
if key in self._rcounts.keys():
self._rcounts[key] += 1
else:
self._rcounts[key] = 1
self._reflabel[key]= len(self._reflabel)
self._rdates[k]=self._reflabel[key]
if (indlist and x == indlist[0][0]):
indlist = indlist[1:]
regimes.append(self._reflabel[key])
x += 1
self._regimes = np.array(regimes)
self._rcentroids = self._clusters.spectral_centroids(self._regimes)
def filter_small(self):
cont=True
s = 0
t = 1
data = self._clusters._distlist._covlist
while cont :
if self._rcentroids[s][2] > self._reg_threshold:
s+=1
t=0
if s >= len(self._rcentroids):
cont=False
continue
distarget = data[self._rcentroids[s][1]]
v = self._clusters._distlist.vdist(distarget, self._rcentroids, self._clusters._conf.smooth_hellinger())
v[s]=100.
t = np.argmin(v)
self.merge_regimes(s,t)
if len(self._rcentroids) > 1:
self.delete_centroid(s)
if s >= len(self._rcentroids):
s-=1
else:
cont=False
#re-evaluate centroids
self._rcentroids = self._clusters.spectral_centroids(self._regimes)
#cl._dt2rg()
if __name__ == "__main__": if __name__ == "__main__":
...@@ -144,4 +236,11 @@ if __name__ == "__main__": ...@@ -144,4 +236,11 @@ if __name__ == "__main__":
cl = Clusters(conf) cl = Clusters(conf)
cl.cal_affinity_matrix() cl.cal_affinity_matrix()
cl.compute_clusters() cl.compute_clusters()
cl.graph_clusters() r = Regimes(cl)
\ No newline at end of file r.compute_inter()
r.label_regimes()
r.printout_centroids('regimes')
r.filter_small()
r.printout_centroids('regimes')
r._clusters.graph_(r._rcentroids,r._regimes,'regimes')
\ No newline at end of file
...@@ -21,4 +21,7 @@ eigen_solver=arpack ...@@ -21,4 +21,7 @@ eigen_solver=arpack
spectraln_neighbors=6 spectraln_neighbors=6
n_neighbors=10 n_neighbors=10
random_states=48 random_states=48
smooth hellinger=True smooth hellinger=True
\ No newline at end of file
[regimes]
threshold = 4
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment