## This code is written by Davide Albanese, <albanese@fbk.eu>.
## (C) 2009 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.


from numpy import *
import hccore

__all__ = ['HCluster']


class HCluster:
    """Hierarchical Cluster.
    """
   
    def __init__ (self, method = 'euclidean', link = 'complete'):
        """Initialize Hierarchical Cluster.

        :Parameters:
        
          method : string ('euclidean')
                   the distance measure to be used

          link : string ('single', 'complete', 'mcquitty', 'median')
                 the agglomeration method to be used

        Example:

        >>> import numpy as np
        >>> import mlpy
        >>> x = np.array([[ 1. ,  1.5],
        ...               [ 1.1,  1.8],
        ...               [ 2. ,  2.8],
        ...               [ 3.2,  3.1],
        ...               [ 3.4,  3.2]])
        >>> hc = mlpy.HCluster()
        >>> hc.compute(x)
        >>> hc.ia
        array([-4, -1, -3,  2])
        >>> hc.ib
        array([-5, -2,  1,  3])
        >>> hc.heights
        array([ 0.2236068 ,  0.31622776,  1.4560219 ,  2.94108844])
        >>> hc.cut(0.5)
        array([0, 0, 1, 2, 2])
        """

        self.METHODS = {
            'euclidean': 1,
            }
        
        self.LINKS = {
            'single': 1,
            'complete': 2,
            'mcquitty': 3,
            'median': 4,
            }
                
        
        self.method = method
        self.link = link

        self.__ia = None
        self.__ib = None
        self.__heights = None

        self.ia = None
        self.ib = None
        self.heights = None

        self.order = None

        self.computed = False


    def compute(self, x):
        """Compute Hierarchical Cluster.

        :Parameters:
          x : ndarray
              An 2-dimensional vector (sample x features).

        :Returns:
          self.ia : ndarray (1-dimensional vector)
                    merge
          self.ib : ndarray (1-dimensional vector)
                    merge

          self.heights : ndarray (1-dimensional vector)
                         a set of n-1 non-decreasing real values.
                         The clustering height: that is, the value of the
                         criterion associated with the clustering method
                         for the particular agglomeration.

        Element i of merge describes the merging of clusters at step i
        of the clustering. If an element j is negative, then observation
        -j was merged at this stage. If j is positive then the merge was
        with the cluster formed at the (earlier) stage j of the algorithm.
        Thus negative entries in merge indicate agglomerations of singletons,
        and positive entries indicate agglomerations of non-singletons.
        """

        if x.ndim != 2:
            raise ValueError("x must be 2D array")        

        self.__ia, self.__ib, self.__heights, self.order = \
                 hccore.compute(x.T, self.METHODS[self.method], self.LINKS[self.link])

        self.ia = self.__ia[:-1] 
        self.ib = self.__ib[:-1]
        self.heights = self.__heights[:-1]

        self.computed = True

    def cut(self, ht):
        """Cuts the tree into several groups by specifying the cut
        height.
        
        :Parameters:
          ht : float
               height where the tree should be cut
         
        :Returns:
          cl : ndarray (1-dimensional vector)
               group memberships. Groups are in 0, ..., N-1
        """

        if self.computed == False:
            raise ValueError("No hierarchical clustering computed")

        return hccore.cut(self.__ia, self.__ib, self.__heights, ht) - 1
