class bicluster: def __init__(self,vec,left=None,right=None,distance=0.0,id=None): self.left=left self.right=right self.vec=vec self.id=id self.distance=distance def hcluster(rows,distance=pearson): distances={} currentclustid=-1 # Clusters are initially just the rows clust=[bicluster(rows[i],id=i) for i in range(len(rows))] # range(5) => [0,1,2,3,4] # so, i ranges over 0, 1, .., number of rows - 1 # # Line above is the same as: # # clust = [] # for i in range(len(rows)): # clust.append(bicluster(rows[i],id=i)) # # note Python parameters # clust has one entry per blog; the id is which row/blog it is # # Will continue until have a complete clustering while len(clust)>1: # this is a tuple; created by just using () like this lowestpair=(0,1) # note use of "distance" - functions are first class objects closest=distance(clust[0].vec,clust[1].vec) # loop through every pair looking for the smallest distance for i in range(len(clust)): for j in range(i+1,len(clust)): # distances is the cache of distance calculations if (clust[i].id,clust[j].id) not in distances: distances[(clust[i].id,clust[j].id)]=distance(clust[i].vec,clust[j].vec) d=distances[(clust[i].id,clust[j].id)] if d