# contributed by Oleksandr (Sasha) Huziy <guziy.sasha@gmail.com>

# Operating on files in the current directory starting with mrros_*,
# finds dates and time indexes of timesteps where the entire domain is
# missing_value.

from netCDF4 import Dataset
from netCDF4 import num2date
from netCDF4 import date2num

import numpy as np

from datetime import datetime

files_not_Ok = []

def checkFile(fName, varName):
    #print "Checking {0} ...".format(fName)
    ds = Dataset(fName)
    varTime = ds.variables["time"]

    vTime = ds.variables["time"][:]

    #print "read times into memory"
    data = ds.variables[varName] #surface runoff

    ##check if we have a completely masked field
    bad_times = map(lambda i: hasattr(data[i,:,:],"mask") and np.all(data[i,:,:].mask), range(len(vTime)))
    #print "Calculated indices of bad times"
    bad_times = np.array(bad_times, dtype = np.bool)
    #print bad_times

    if np.any(bad_times):
        print "-- Found bad times in {0} --".format(fName)
        files_not_Ok.append(fName)
    else:
        print "File {0} is OK.".format(fName)
        
    for d, hour in zip( vTime[bad_times], np.where(bad_times)[0]):
        print num2date(d, varTime.units)
        print "index: ", hour
        print "{0} {1}".format(d, varTime.units)
        suspect = data[hour,:,:]
        print "min, max: ", np.min(suspect), np.max(suspect)


import os
varName = "mrros"
for fName in os.listdir("."):
    if fName.startswith(varName + "_"):
        checkFile(fName, varName)

print "Finished checking"
print "corrupted files: "
print "\n".join(files_not_Ok)

