#!/usr/bin/python3

import datetime

from gratia.common.Gratia import DebugPrint
#import gratia.common.GratiaWrapper as GratiaWrapper
import gratia.common.Gratia as Gratia
from gratia.services.StorageElement import StorageElement
from gratia.services.StorageElementRecord import StorageElementRecord

from gratia.common2.meter import GratiaMeter, DebugPrintLevel
from gratia.common2.pgpinput import PgInput
import gratia.common2.timeutil as timeutil


class _EnstoreStorageInputStub:
    """Stub class, needs to be defined before the regular one, to avoid NameError
    """
    value_matrix = [['2013-05-01 00:00:00', 'ALEX',              27501971200,    29502252800,           94,         101],
['2013-05-01 00:00:00', 'AMN',                         0,              0,            0,           0],
['2013-05-01 00:00:00', 'ANM',             5324442004458,  5489039007858,         1645,        2087],
['2013-05-01 00:00:00', 'none',                        0,              0,            0,           0],
['2013-06-01 00:00:00', 'ALEX',              32502444800,    34502726400,          111,         118],
['2013-06-01 00:00:00', 'AMN',                         0,              0,            0,           0],
['2013-06-01 00:00:00', 'ANM',             5330140154858,  5494737158258,         1654,        2096],
['2013-06-01 00:00:00', 'none',                        0,              0,            0,           0],
['2013-07-01 00:00:00', 'ALEX',              32702534400,    34702816000,          112,         119],
['2013-07-01 00:00:00', 'AMN',                         0,              0,            0,           0],
['2013-07-01 00:00:00', 'ANM',             5363559205866,  5533639729266,         1745,        2190],
['2013-07-01 00:00:00', 'none',                        0,              0,            0,           0],
['2013-08-01 00:00:00', 'ANM',             9715492242387,  9894136393825,         7715,        8179],
['2013-08-01 00:00:00', 'none',                        0,              0,            0,           0],
['2014-02-01 00:00:00', 'ANM',             9999003890760, 10599915674909,         7964,        8601],
['2014-02-01 00:00:00', 'litvinse',            231480377,      231480377,          120,         120],
['2014-02-01 00:00:00', 'test',                  2100468,        4200936,            3,           6],
['2014-03-01 00:00:00', 'ALEX',              10400563200,    10400563200,           62,          62],
['2014-03-01 00:00:00', 'ANM',             9343845119048, 10192509713697,         7914,        8621],
['2014-03-01 00:00:00', 'litvinse',            231480377,      231480377,          120,         120],
['2014-03-01 00:00:00', 'none',                        0,              0,            0,           0],
['2014-03-01 00:00:00', 'test',                  2100468,        4200936,            3,           6],
['2014-04-01 00:00:00', 'ALEX',              10400563200,    10400563200,           62,          62],
['2014-04-01 00:00:00', 'ANM',             9430388985014, 10280528687817,         8222,        9025],
['2014-04-01 00:00:00', 'e906',                        0,              0,            0,           0],
['2014-04-01 00:00:00', 'litvinse',            231480377,      231480377,          120,         120],
['2014-04-01 00:00:00', 'none',                        0,              0,            0,           0],
['2014-04-01 00:00:00', 'test',                  2100468,        4200936,            3,           6],
['2014-05-01 00:00:00', 'ALEX',              15002131200,    15002131200,           54,          54],
['2014-05-01 00:00:00', 'ANM',             9433767656334,  9654836052415,         8108,        8965],
['2014-05-01 00:00:00', 'e906',                        0,              0,            0,           0],
['2014-05-01 00:00:00', 'litvinse',            231480377,      231480377,          120,         120],
['2014-05-01 00:00:00', 'test',                  2100468,        4200936,            3,           6],
['2014-06-01 00:00:00', 'ALEX',               2900518400,     2900518400,           11,          11],
['2014-06-01 00:00:00', 'ANM',             9434587326624,  9655655722705,         8145,        9002],
['2014-06-01 00:00:00', 'e906',                        0,              0,            0,           0],
['2014-06-01 00:00:00', 'litvinse',            231480377,      231480377,          120,         120],
['2014-07-01 00:00:00', 'ALEX',             910905676800,   910905676800,         3044,        3044],
['2014-07-01 00:00:00', 'ANM',            10344072760622, 10565385542446,         8877,        9846],
['2014-07-01 00:00:00', 'e906',                        0,              0,            0,           0],
['2014-07-01 00:00:00', 'litvinse',            231480377,     1102439795,          120,      377322]
    ]

    def get_records():
        for i in _EnstoreStorageInputStub.value_matrix:
            retv = {'date': timeutil.parse_datetime(i[0]),
                    'storage_group': i[1],
                    'active_bytes': int(i[2]),
                    'total_bytes': int(i[3]),
                    'active_files': int(i[4]),
                    'total_files': int(i[5])
                    }
            yield retv
    get_records = staticmethod(get_records)


class EnstoreStorageInput(PgInput):
    """Query the records form the Enstore enstoredb DB
    """

    VERSION_ATTRIBUTE = 'EnstoreVersion'

    def get_init_params(self):
        """Return list of parameters to read form the config file"""
        return PgInput.get_init_params(self) + [EnstoreStorageInput.VERSION_ATTRIBUTE]

    def start(self, static_info):
        PgInput.start(self, static_info)
        DebugPrint(4, "ESI start, static info: %s" % static_info)
        if EnstoreStorageInput.VERSION_ATTRIBUTE in static_info:
            self._set_version_config(static_info[EnstoreStorageInput.VERSION_ATTRIBUTE])

    def _start_stub(self, static_info):
        """start replacement for testing: database connection errors are trapped"""
        try:
            DebugPrint(4, "Testing DB connection. The probe will not use it")
            PgInput.start(self, static_info)
            if self.status_ok():
                DebugPrint(4, "Connection successful")
            else:
                DebugPrint(4, "Connection failed")
            DebugPrint(4, "Closing the connection")
            self.stop()
        except:
            DebugPrint(1, "Database connection failed. The test can continue since stubs are used.")
        DebugPrint(4, "ESI start stub, static info: %s" % static_info)
        if EnstoreStorageInput.VERSION_ATTRIBUTE in static_info:
            self._set_version_config(static_info[EnstoreStorageInput.VERSION_ATTRIBUTE])

    def get_version(self):
        # RPM package is 'enstore'
        return self._get_version('enstore')

    @staticmethod
    def get_record_id(srecord):
        # This record should be unique. Only one storage_group per timestamp
        #TODO: verify actual DB duplicate constraint
        return "%s-%s" % (srecord['date'], srecord['storage_group'])

    def get_current_records(self, limit=None):
        """List rhe current usage of enstore storage. There is no checkpoint since this is a status DB,
there are no historical data, the time is always the one of the query.

The query suggested by Dmitry is equivalent to the one generating entries in the historic table
(historic_tape_bytes). The query used is derived from that.
Volume contains the current status of all enstore volumes.

Database table:
::
    enstoredb=> \d volume
                                               Table "public.volume"
          Column       |            Type             |                        Modifiers
    -------------------+-----------------------------+----------------------------------------------------------
     id                | integer                     | not null default nextval(('volume_seq'::text)::regclass)
     label             | character varying           |
     block_size        | integer                     | default 0
     capacity_bytes    | bigint                      | default 0
     declared          | timestamp without time zone |
     eod_cookie        | character varying           |
     first_access      | timestamp without time zone |
     last_access       | timestamp without time zone |
     library           | character varying           |
     media_type        | character varying           |
     non_del_files     | integer                     | default 0
     remaining_bytes   | bigint                      | default 0
     sum_mounts        | integer                     | default 0
     sum_rd_access     | integer                     | default 0
     sum_rd_err        | integer                     | default 0
     sum_wr_access     | integer                     | default 0
     sum_wr_err        | integer                     | default 0
     system_inhibit_0  | character varying           |
     system_inhibit_1  | character varying           |
     si_time_0         | timestamp without time zone |
     si_time_1         | timestamp without time zone |
     user_inhibit_0    | character varying           |
     user_inhibit_1    | character varying           |
     storage_group     | character varying           |
     file_family       | character varying           |
     wrapper           | character varying           |
     comment           | character varying           |
     write_protected   | character(1)                | default 'u'::bpchar
     active_files      | integer                     | default 0
     deleted_files     | integer                     | default 0
     unknown_files     | integer                     | default 0
     active_bytes      | bigint                      | default 0
     deleted_bytes     | bigint                      | default 0
     unknown_bytes     | bigint                      | default 0
     modification_time | timestamp without time zone |
    Indexes:
        "volume_pkey" PRIMARY KEY, btree (id)
        "volume_label_key" UNIQUE CONSTRAINT, btree (label)
        "volume_declared_idx" btree (declared)
        "volume_eod_cookie_idx" btree (eod_cookie)
        "volume_file_family_idx" btree (file_family)
        "volume_label_key_idx" btree (label)
        "volume_last_access_idx" btree (last_access)
        "volume_library_idx" btree (library)
        "volume_media_type_idx" btree (media_type)
        "volume_modification_time_idx" btree (modification_time)
        "volume_remaining_bytes_idx" btree (remaining_bytes)
        "volume_storage_group_idx" btree (storage_group)
        "volume_system_inhibit_0_idx" btree (system_inhibit_0)
        "volume_system_inhibit_1_idx" btree (system_inhibit_1)
        "volume_user_inhibit_0_idx" btree (user_inhibit_0)
        "volume_user_inhibit_1_idx" btree (user_inhibit_1)
        "volume_wrapper_idx" btree (wrapper)
        "volume_write_protected_idx" btree (write_protected)
    Check constraints:
        "volume_write_protected_check" CHECK (write_protected = 'u'::bpchar OR write_protected = 'y'::bpchar OR write_protected = 'n'::bpchar)
    Referenced by:
        TABLE "file" CONSTRAINT "$1" FOREIGN KEY (volume) REFERENCES volume(id)
        TABLE "state" CONSTRAINT "$1" FOREIGN KEY (volume) REFERENCES volume(id) ON DELETE CASCADE
        TABLE "volume_audit_counter" CONSTRAINT "$1" FOREIGN KEY (volume) REFERENCES volume(id) ON DELETE CASCADE
        TABLE "volume_audit" CONSTRAINT "$1" FOREIGN KEY (volume) REFERENCES volume(id) ON DELETE CASCADE

The records are selected and transformed to have the correct format for the probe.

:param limit: ignored (kept for compatibility)
:yield: record for the probe
        """
        sql = '''SELECT now() as date,
storage_group,
coalesce(sum(active_bytes),0) as active_bytes,
coalesce((sum(active_bytes)+sum(unknown_bytes)+sum(deleted_bytes)),0) as total_bytes,
coalesce(sum(active_files),0) as active_files,
coalesce((sum(active_files)+sum(unknown_files)+sum(deleted_files)),0) as total_files
FROM volume WHERE system_inhibit_0!='DELETED' and
media_type not in ('null','disk') and library not like '%shelf%' and library not like '%test%'
GROUP BY storage_group ORDER BY storage_group'''

        DebugPrint(4, "Requesting new current Enstore Storage records %s" % sql)
        for r in self.query(sql):
            yield r

    def get_records(self, limit=None):
        """Select the usage records from the storage table

Database table:
::
    enstoredb=> \d historic_tape_bytes;
              Table "public.historic_tape_bytes"
       Column     |            Type             | Modifiers
    ---------------+-----------------------------+-----------
    date          | timestamp without time zone | not null
    storage_group | character varying           | not null
    active_bytes  | bigint                      |
    unknown_bytes | bigint                      |
    deleted_bytes | bigint                      |
    active_files  | bigint                      |
    unknown_files | bigint                      |
    deleted_files | bigint                      |
    Indexes:
       "historic_tape_bytes_pkeys" PRIMARY KEY, btree (date, storage_group)

The table is updated monthly and for each storage group contains
record about number of bytes.

We are interested in
 - active_bytes and (active_bytes+deleted_bytes+unknown_bytes) as total_bytes
 - active_files and (active_files+deleted_files+unknown_files) as total_files

NOTE: The time in date is stored as local time! (Dmitry 2/1/2015)
The DB returns time (date) as naive datetime and requires strings like 'YYYY-MM-DD HH:MM:SS'.
date cannot be null.

:param limit: maximum number of hours to include in the query
:yield: record for the probe
        """
        checkpoint = self.checkpoint


        where_clauses = []
        # DB uses local time -> checkpoint and all timestamps are in local time
        start_time = None
        #end_time = timeutil.at_minute(datetime.datetime.now())
        end_time = None
        # Give to the end time a 60 seconds delay. To allow for enstore DB processing time
        if limit > 0:
            end_time = timeutil.wind_time(start_time, hours=limit, backward=False)
            end_time = min(end_time, timeutil.wind_time(datetime.datetime.now(), seconds=60))
        else:
            end_time = timeutil.wind_time(datetime.datetime.now(), seconds=60)
        if checkpoint:
            start_time = checkpoint.date()
            where_clauses.append("date >= '%s'" % timeutil.format_datetime(start_time, iso8601=False))
            end_time = timeutil.at_minute(end_time)
            where_clauses.append("date < '%s'" % timeutil.format_datetime(end_time, iso8601=False))
            # Should I let the query handle this? Would be empty
            if start_time >= end_time:
                return
        if where_clauses:
            where_sql = "WHERE %s" % " AND ".join(where_clauses)
        else:
            where_sql = ""

        sql = '''SELECT
            date,
            storage_group, active_bytes,
            (active_bytes+unknown_bytes+deleted_bytes) as total_bytes,
            active_files,
            (active_files+unknown_files+deleted_files) as total_files
            FROM historic_tape_bytes
            %s
            ORDER BY date, storage_group
            ''' % (where_sql)

        DebugPrint(4, "Requesting new Enstore Storage records %s" % sql)
        for r in self.query(sql):
            # Consider adding handy data to job record
            #r['cluster'] = self._cluster
            #self._addUserInfoIfMissing(r)
            #TODO: filter out invalid records (e.g. check for None values)?
            yield r

        # Not checking how old is the last record compared to the checkpoint (like in other probes).
        # These entries are updated once in a while (monthly) and would trigger frequent warnings
        if checkpoint:
            DebugPrint(4, "Saving new Enstore Storage checkpoint %s" % end_time)
            checkpoint.set_date_transaction(end_time)

    def _get_records_stub(self, limit=None):
        """get_records replacement for tests: records are from a pre-filled array"""
        checkpoint = self.checkpoint
        if checkpoint:
            DebugPrint(4, "Ignoring checkpoint in stub function: %s" % checkpoint.date())
        if limit is None:
            limit = 0
        for i in _EnstoreStorageInputStub.get_records():
            yield i
            if limit > 0:
                limit -= 1
                if limit == 0:
                    break
        if checkpoint:
            DebugPrint(4, "Stub function, saving checkpoint with same date: %s" % checkpoint.date())
            checkpoint.set_date_transaction(checkpoint.date())

    def do_test(self, static_info=None):
        """Test with pre-arranged DB query results
        """
        # replace DB calls with stubs
        self.start = self._start_stub
        self.get_records = self._get_records_stub


class EnstoreStorageProbe(GratiaMeter):
    """Probe collecting enstore storage information

    By default collects data from the historic table using the checkpoint and limit provided in the config file.
    When invoked with ``--current`` it retrieves the current information from the Volumes table (no checkpoint or limit)
    """

    PROBE_NAME = 'enstore-storage'
    # dCache, xrootd, Enstore
    SE_NAME = 'Enstore'
    # Production
    SE_STATUS = 'Production'
    # disk, tape
    SE_TYPE = 'tape'
    # raw, logical
    SE_MEASUREMENT_TYPE = 'logical'

    def __init__(self):
        GratiaMeter.__init__(self, self.PROBE_NAME)
        self._probeinput = EnstoreStorageInput()
        # --current invokes get_current_records which ignores the checkpoint or other flow options
        if self._opts.current:
            DebugPrint(4, "Selecting current records. Will ignore checkpoint and flow options.")
            self._get_records = self._probeinput.get_current_records
        else:
            self._get_records = self._probeinput.get_records

    def get_opts_parser(self):
        """Return an options parser. Extends the GratiaMeter one.

        :return: option parser with new options
        """
        # extend the parent parser
        parser = super(EnstoreStorageProbe, self).get_opts_parser()

        # add (other) options
        parser.add_option(
            "--current",
            help="Query the Volume table (with the current space usage) instead of the historic_tape_bytes.",
            dest="current", default=False, action="store_true"
        )
        return parser

    def get_storage_element(self, unique_id, site, name, parent_id=None, timestamp=None):
        """Return a StorageElement record: name and VO info. Timestamp is set only if provided"""
        if not parent_id:
            parent_id = unique_id
        gse = StorageElement()
        gse.UniqueID(unique_id)
        gse.SE(site)
        gse.Name(name)
        gse.ParentID(parent_id)
        # VO
        # OwnerDN
        gse.SpaceType("StorageGroup")  # PoolGroup, StorageGroup in enstore terminology
        if timestamp:
            gse.Timestamp(timestamp)
        gse.Implementation(self.SE_NAME)
        gse.Version(self.get_version())
        gse.Status(self.SE_STATUS)
        # ProbeName
        # probeid
        # SiteName
        # Grid
        return gse

    def get_storage_element_record(self, unique_id, timestamp=None):
        """Return a StorageElementRecord obj: id and space used. Timestamp is set only if provided"""
        gser = StorageElementRecord()
        gser.UniqueID(unique_id)
        gser.MeasurementType(self.SE_MEASUREMENT_TYPE)
        gser.StorageType(self.SE_TYPE)
        # StorageType
        if timestamp:
            gser.Timestamp(timestamp)
        # TotalSpace, Free, Used
        # FileCountLimit
        # FileCount
        # Probename
        # probeid
        return gser

    def input_to_gsrs(self, inrecord, selement, serecord):
        """(EnstoreStorage)Input to Gratia Storage RecordS.

        Add input values to storage element and storage element record
        Return the tuple VO,tot,free,used,f_tot,f_used to allow cumulative counters

        :param inrecord: EnstoreStorageInput object (dictionary)
        :param selement: StorageElement record
        :param serecord: StorageElementRecord record
        :return: Return the tuple (VO,tot,free,used,f_tot,f_used) to allow cumulative counters
        """
        # SE
        selement.VO(inrecord['storage_group'])
        selement.Name(inrecord['storage_group'])
        timestamp = timeutil.format_datetime(timeutil.datetime_to_utc(inrecord['date']))
        selement.Timestamp(timestamp)
        # SER
        # TODO: investigate different DB structure
        # Record is currently "abused"
        # used - active bytes (valid files)
        # total - active + deleted + unknown (total files on tape, not total space available)
        # free - deleted + unknown (lost, ...) files
        serecord.Timestamp(timestamp)
        used = inrecord['active_bytes']
        total = inrecord['total_bytes']
        # Values are in bytes
        serecord.TotalSpace(total)
        if total is None or used is None:
            unavailable = None
        else:
            unavailable = total - used
        serecord.FreeSpace(unavailable)
        serecord.UsedSpace(used)
        # Add file counts
        # different form total_files - serecord.FileCountLimit()
        serecord.FileCount(inrecord['active_files'])
        return inrecord['storage_group'], total, unavailable, used, 0, inrecord['active_files']

    def main(self):
        """Main loop: retreve the data from the input and send it to Gratia"""
        # Initialize the probe and the input
        self.start()
        DebugPrintLevel(4, "Enstore storage probe started")

        # Must be in configuration file, set initially to EnstoreStorage
        se = self.get_sitename()

        # Prepare data for parent storage element
        # Understand the meaning of the name: name = self.get_probename()
        parent_name = se
        unique_id = "%s:SE:%s" % (se, se)
        parent_id = unique_id
        # The parent record will be sent in the loop and after (one for each date where there are other records)

        srecord_date = None
        # Loop over storage records
        # These can be either historic records or a current snapshot depending on CLI option
        for srecord in self._get_records():
            if srecord_date is None:
                srecord_date = srecord['date']
            else:
                # records are grouped and ordered by date. If date changes, will not go back to old value
                # parent record for that timestamp can be sent
                # TODO: add also a SER with summary for used and "available" space?
                if not srecord_date == srecord['date']:
                    # Parent storage element
                    DebugPrintLevel(4, "Sending the parent StorageElement (%s/%s/%s)" % (se, parent_name, srecord_date))
                    timestamp = timeutil.format_datetime(timeutil.datetime_to_utc(srecord_date))
                    gse = self.get_storage_element(parent_id, se, parent_name, timestamp)
                    Gratia.Send(gse)
                    srecord_date = srecord['date']
            vo_name = srecord['storage_group']
            DebugPrint(4, "Sending SE/SER for VO %s" % vo_name)
            unique_id = "%s:StorageGroup:%s" % (se, vo_name)
            # the name of the se is the vo_name
            # the timestamp is coming from the 'date' value in the record
            gse = self.get_storage_element(unique_id, se, vo_name, parent_id)
            gser = self.get_storage_element_record(unique_id)

            self.input_to_gsrs(srecord, gse, gser)

            # To print the records being sent
            #gse.Print()
            #gser.Print()

            Gratia.Send(gse)
            Gratia.Send(gser)

        if srecord_date is not None:
            DebugPrintLevel(4, "Sending the parent StorageElement (%s/%s/%s)" % (se, parent_name, srecord_date))
            timestamp = timeutil.format_datetime(timeutil.datetime_to_utc(srecord_date))
            gse = self.get_storage_element(parent_id, se, parent_name, timestamp)
            Gratia.Send(gse)


if __name__ == "__main__":
    # do the work
    EnstoreStorageProbe().main()

