import numpy as np

def read_rtv ( fillst, idxset=None, hdr_only=False ):
  """ Procedure to read MORSE .rtv file(s) 

  Version
    22NOV24 AD Add hdr['hdr1'], hdr['hdr2']
    08OCT24 AD Correction for idxset parameter
    07OCT24 AD Correction for reading multi-set files
    28FEB24 AD Original

  Parameters
    fillst  [str] : list of .rtv filenames in time order
    idxset   int  : Set# required, default is last, which is final retrieval.
    hdr_only boo  : True = only read in file header (as {hdr})

  Returns
    {hdr}  :  Dictionary of file-level variables
    {pix}  :  Dictionary of lists, each of size hdr['npix']   
  """

# Define structure of hdr dictionary
  hdr = { 'npix':0,   # No. pixels/profile locations in file
          'nset':0,   # No. retrieval sets per pixel (usually 1)
          'nprf':0,   # No. different profiles per pixel
          'nlev':0,   # Max. no. levels in any profile
        'labgrd':'',  # Profile grid type, '*PRE', '*HGT' or '*HGT_NOM'
        'levgrd':[],  # (nlev) List of pressure [hPa] or altitude [km] levels
        'labprf':[],  # (nprf) List of profile species etc
        'nlvprf':{},  # For each labprf, No. profile levels
        'levprf':{},  # For each labprf, a list of nlvprf indices (1=lowest)
          'rfmt':0.0, # .rtv file format identifier
         'igeom':0,   # Viewing geometry, 1=limb emission, 2=limb trans, 3=nadir
    'instrument':'',  # Instrument identification
     'satellite':'',  # Satellite identification
        'ymdsta':0,   # Year, month, day of start of data (yyyymmdd)
        'daysta':0,   # Day# since 1 Jan 2000 (=0) of start of data
        'hmssta':0,   # Hours, minutes, seconds of start of data (hhmmss)
        'orbsta':0,   # Orbit# of start of data
        'ymdend':0,   # Year, month, day of end of data (yyyymmdd)
        'dayend':0,   # Day# since 1 Jan 2000 (=0) of end of data
        'hmsend':0,   # Hours, minutes, seconds of start of data (hhmmss)
        'orbend':0 }  # Orbit# of end of data

# Executable code ------------------------------------------------------------

  first = True                # flag first file
  mpix  = 0                   # No pixels read so far 

  if not isinstance(fillst,list): # if single filename supplied, make into list
    fillst = [fillst]

  for rtvfil in fillst:       # Loop over .rtv files 
    with open(rtvfil) as f:
      if first: 
        hdr['hdr1'] = f.readline()
        hdr['hdr2'] = f.readline()
      rec = '!'
      while rec[0] == '!': rec = f.readline()   # Skip header records

      # Record containing file format identifier
      flds = rec.split()
      rfmt = float ( flds[0] )
      if first:
        hdr['rfmt'] = rfmt
        if rfmt > 2.0:
          print('W-READ_RTV: May not read new file format correctly')
      elif rfmt != hdr['rfmt']:
        print('F-READ_RTV: Files with different file format IDs')
        exit()

      # Record containing viewing geometry
      rec = f.readline()
      flds = rec.split()
      igeom = int(flds[0])
      if first:
        hdr['igeom'] = igeom
        nadir = igeom == 3
      elif igeom != hdr['igeom']:
        print('F-READ_RTV: Files with different viewing geometries')
        exit()

      # Record containing instrument and satellite ID
      rec = f.readline()
      if first:
        flds = rec.split()
        hdr['instrument'] = flds[0]
        hdr['satellite']  = flds[1]

      # Record containing nominal date, Julian day for this file
      rec = f.readline()
      flds = rec.split()
      ymd = int ( flds[0] ) 
      day = int ( flds[1] )
      if first:             # assume first listed .rtv file is earliest data
        hdr['ymdsta'] = ymd
        hdr['daysta'] = day
      hdr['ymdend'] = ymd   # update end date from latest .rtv file
      hdr['dayend'] = day

      # Record containing orbit# and start/end times for orbit
      rec  = f.readline()
      flds = rec.split()
      orbit  = int ( flds[0] )
      hmssta = int ( flds[1] )
      hmsend = int ( flds[2] )
      if first:                 # assume first listed .rtv file is earlies
        hdr['orbsta'] = orbit
        hdr['hmssta'] = hmssta
      hdr['orbend'] = orbit     # update end time from latest .rtv file
      hdr['hmsend'] = hmsend

      # Record containing no.pixels and no.rtv sets per pixel (usually 1)
      rec = f.readline()
      flds = rec.split()
      npix = int ( flds[0] )
      nset = int ( flds[1] )
      hdr['npix'] += npix       # add to total 
      if first: 
        hdr['nset'] = nset
        if idxset is None: idxset = nset
      elif nset != hdr['nset']:
        print('F-READ_RTV: Files with different no.of retrieval sets')
        exit()
        
      # Record containing max.no. profile levels and no.different profiles
      rec = f.readline()
      flds = rec.split()
      nlev = int ( flds[0] )
      nprf = int ( flds[1] )
      if first: 
        hdr['nlev'] = nlev
        hdr['nprf'] = nprf
      elif nlev != hdr['nlev']:
        print('F-READ_RTV: Files with different no.of profile levels')
        exit()
      elif nprf != hdr['nprf']:
        print('F-READ_RTV: Files with different no.of profile types')
        exit()

      # Records containing profile grid type and then list of levels      
      rec = f.readline()
      flds = rec.split()
      labgrd = flds[0][1:].lower()  # remove '*' marker
      levgrd = np.fromfile(f,sep=" ",count=nlev)
      if first: 
        hdr['labgrd'] = labgrd
        hdr['levgrd'] = levgrd
      elif labgrd != hdr['labgrd']:
        print('F-READ_RTV: Files with different profile grid types')
        exit()

      # Records containing profile types and vertical grids
      for iprf in range(nprf):   # loop over each profile type
        # Record containing profile type and no.grid levels
        rec = f.readline()
        flds = rec.split()
        prf = flds[0].lower()
        nlvp  = int ( flds[1] )
        # if no.levels non-zero but less than nlev, read list of 1,0 flags
        if nlvp > 0 and nlvp < nlev:
          flags = np.fromfile(f,sep=" ",dtype=int,count=nlev)

        if first: 
          hdr['labprf'].append(prf)   # Add prf to list in hdr
          if nlvp == nlev:              # If profile represented on full grid
            idxlev = np.arange(nlev)  #   set to 0,1,2,3 ... nlev-1
          elif nlvp == 0:               # If scalar data
            idxlev = [0]              #   set to 0
            nlvp = 1                    #   but treat as nlvrtv = 1 from here
          else:                       # If partially filled grid
            idxlst = []
            for ilev in range(nlev):
              if flags[ilev] == 1: idxlst.append(ilev)
            idxlev = np.array(idxlst) #   set to subset of full grid indices
          hdr['nlvprf'][prf] = nlvp
          hdr['levprf'][prf] = idxlev
        elif prf not in hdr['labprf']:
          print('F-READ_RTV: Files with different profile grid types')
          exit()
        elif nlvp == 0:  # after first file continue to reset nlvp to 1
          nlvp = 1
        elif nlvp != hdr['nlvprf'][prf]:
          print(nlvp,prf,hdr['nlvprf'][prf])
          print('F-READ_RTV: ' + prf + ' has different no.profile levels')
          exit()
 
      # Record containing end of header marker
      rec = f.readline()     
# File header section now read in

      if not hdr_only:
         
# Define structure of pixel header components of pix dictionary
        if first:
          npi0 = np.zeros(0,dtype=int)
          npf0 = np.zeros(0,dtype=float)
          pix = {}
          pix['orb'] = npi0     # Orbit# for pixel
          pix['ipx'] = npi0     # Pixel# within data
          pix['ymd'] = npi0     # Year,month,day [yyyymmdd]
          pix['hms'] = npi0     # Hours,minutes,seconds UT [hhmmss]
          pix['msc'] = npi0     # Milliseconds into day 
          pix['lat'] = npf0     # Latitude [deg N]
          pix['lon'] = npf0     # Longitude [deg E]
          pix['sza'] = npf0     # Solar Zenith Angle [deg from zenith]
          if nadir:       # eg IASI
            pix['stp'] = npi0   # Scan step# (1-30 for IASI)
            pix['fov'] = npi0   # FOV# (1-4 for IASI)
            pix['zen'] = npf0   # Satellite zenith angle [deg for zenith]
            pix['cld'] = npf0   # Cloud cover [%]
            pix['lnd'] = npf0   # Land cover [%]
          else:           # eg MIPAS
            pix['lst'] = npf0   # Local Solar time [hr since midnight]
          # Add prf arrays to pix dictionary, will be resized later
          for prf in hdr['labprf']:
            nlvp = hdr['nlvprf'][prf]
            if nlvp == 1:                 # If scalar, set data as [npix]
              pix[prf] = np.zeros(0,dtype=float) 
            else:                       # If vector, set data as [npix,nlvp]
              pix[prf] = np.zeros([0,nlvp],dtype=float) 
          mpix = 0 

        ipix1 = mpix   # ipix1 is index of last pix# from previous file
        mpix += npix   # npix is no.pixels in current file 

        # Resize pixel data to accommodate new file
        for key in pix:
          if key in hdr['labprf']:    # profile data
            prf = key
            nlvp = hdr['nlvprf'][prf] 
            if nlvp > 1:
              pix[prf] = np.resize ( pix[prf], [mpix,nlvp] ) 
            else:
              pix[prf] = np.resize ( pix[prf], mpix )
          else: 
            pix[key] = np.resize ( pix[key], mpix )

        # read through file data
        for ipix in range(ipix1,mpix): 
          rec = f.readline()    # Pixel counter (original pixel#)
          rec = f.readline()    # header labels
          # Pixel header record
          pix['orb'][ipix] = orbit    # orbit# read from file header
          rec = f.readline()
          flds = rec.split()
          pix['ymd'][ipix] = int(flds[0])
          pix['hms'][ipix] = int(flds[1])
          pix['msc'][ipix] = int(flds[2])
          if nadir: 
            pix['stp'][ipix] = int(flds[3])
            pix['fov'][ipix] = int(flds[4])      
            pix['lat'][ipix] = float(flds[5])      
            pix['lon'][ipix] = float(flds[6])      
            pix['zen'][ipix] = float(flds[7])      
            pix['sza'][ipix] = float(flds[8])      
            pix['cld'][ipix] = float(flds[9])      
            pix['lnd'][ipix] = float(flds[10])      
          else:
            pix['lat'][ipix] = float(flds[3])      
            pix['lon'][ipix] = float(flds[4])      
            pix['lst'][ipix] = float(flds[5])      
            pix['sza'][ipix] = float(flds[6])      
    
          for iset in range(1,nset+1):  # Loop over retrieval sets within pixel
            rec = f.readline()            # set header
            for iprf in range(nprf):      # Loop over profiles within each set
              rec   = f.readline()        # profile header
              flds  = rec.split()
              prf   = flds[0][1:].lower() # Remove '*' character 
              nlvp  = hdr['nlvprf'][prf]  # No. profile levels
              profil = np.fromfile(f,sep=" ",count=nlvp)
              if iset == idxset:          # save if required profile set
                if nlvp == 1: 
                  pix[prf][ipix] = profil[0]
                else:
                  pix[prf][ipix,:] = profil
            rec = f.readline()    # pixel/set end marker
      first = False

  return hdr, pix
#-------
#hdr,pix = read_rtv('test.txt',idxset=2)

#print(pix['h2o'])
