◀ Back

Offset files

Offset files are utility files designed to allow an easy navigation between other files. There is one such file for each snapshot of each run. They provide a mapping between halo or particle indices and their memory position (i.e. array indices in the files). The available dataset (organized in three groups) are described in the table below (click to open).

Offset datasets ▾
Attribute Dimensions Description
FileOffsets/SnapByType (Nchuncks, 6) The offset table (by type) for the snapshot files, giving the first particle index in each snap file chunk. Determines which files(s) a given offset+length will cover. A two-dimensional array, where the element (i,j) equals the cumulative sum (i.e. offset) of particles of type i in all snapshot file chunks prior to j.
FileOffsets/Group Nchuncks The offset table for groups in the group catalog files. A one-dimensional array, where the ith element equals the first group number in the ith groupcat file chunk.
FileOffsets/Subhalo Nchuncks The offset table for subhalos in the group catalog files. A one-dimensional array, where the ith element equals the first subgroup number in the ith groupcat file chunk
Group/SnapByType (Ngroups_Total,6) The offset table for a given group number (by type), into the snapshot files. That is, the global particle index (across all snap file chunks) of the first particle of this group. A two-dimensional array, where the element (i,j) equals the cumulative sum (i.e. offset) of particles of type i in all groups prior to group number j.
Subhalo/SnapByType (Nsubgroups_Total,6) The offset table for a given subhalo number (by type), into the snapshot files. That is, the global particle index (across all snap file chunks) of the first particle of this subhalo. A two-dimensional array, where the element (i,j) equals the cumulative sum (i.e. offset) of particles of type i in all subhalos prior to subhalo number j.
Subhalo/LHaloTree/File Nsubgroups_Total The LHaloTree file number with the tree which contains this subhalo.
Subhalo/LHaloTree/Num Nsubgroups_Total The number of the tree within the above file within which this subhalo is located (e.g. TreeX).
Subhalo/LHaloTree/Index Nsubgroups_Total The LHaloTree index within the above tree dataset at which this subhalo is located.
For convenience, we provide below a set of functions that emply offsets to speed up the loading of particles or haloes.
Example code ▾
import numpy as np
import h5py

def get_global_index(simpath, snap, offset_type, local_index, chunk):

    """
    Returns the global index (i.e. across all chunks, considered sequentially) of a particle/group/subgroup.

    Parameters
    ----------
    simpath      : string
                   path to the base simulation directory
    snap         : int
                   number of the snapshot to consider
    offset_type  : string
                   string identifying the type of object considered. Accepted values: 'particle', 'group', 'subhalo'
    local_index  : int
                   index of the object in the chunk file
    chunk        : int
                   number of the chunk file the object resides in

    Returns
    -------
    global_index  : int
                    index of the object across all chunk files
    """

    assert(offset_type in ['particle', 'group', 'subhalo'])
    if offset_type == 'particle':
        offset_file_key = 'FileOffsets/SnapByType'
    elif offset_type == 'group':
        offset_file_key = 'FileOffsets/Group'
    elif offset_type == 'subhalo':
        offset_file_key = 'FileOffsets/Subhalo'

    with h5py.File(f'{simpath}/postprocessing/offsets/offsets_{snap:03d}.hdf5', 'r') as offset_file:
        global_index = offset_file[offset_file_key][chunk] + local_index

    return global_index



def get_chunk_and_local_index(simpath, snap, offset_type, global_index, ptype=-1):

    """
    Returns the chunk file number and local index (i.e. within the chunk file) of a particle/group/subgroup.

    Parameters
    ----------
    simpath       : string
                    path to the base simulation directory
    snap          : int
                    number of the snapshot to consider
    offset_type   : string
                    string identifying the type of object considered. Accepted values: 'particle', 'group', 'subhalo'
    global_index  : int
                    index of the object across all chunk files
    ptype         : int, required for offset_type == 'particle', ignored otherwise
                  : particle type

    Returns
    -------
    chunk        : int
                   number of the first chunk file the object resides in
    local_index  : int
                   index of the object in the chunk file
    """

    assert(offset_type in ['particle', 'group', 'subhalo'])
    if offset_type == 'particle':
        assert(ptype>=0 and ptype<6)
    
    
    if offset_type == 'particle':
        offset_file_key = 'FileOffsets/SnapByType'
    elif offset_type == 'group':
        offset_file_key = 'FileOffsets/Group'
    elif offset_type == 'subhalo':
        offset_file_key = 'FileOffsets/Subhalo'

    with h5py.File(f'{simpath}/postprocessing/offsets/offsets_{snap:03d}.hdf5', 'r') as offset_file:
        if offset_type == 'particle':
            chunk = np.where(offset_file[offset_file_key][:, ptype] <= global_index)[0][-1]
            local_index = global_index - offset_file[offset_file_key][chunk, ptype]
        else:
            chunk = np.where(offset_file[offset_file_key][()]       <= global_index)[0][-1]
            local_index = global_index - offset_file[offset_file_key][chunk]

    return chunk, local_index




def get_particles_global_index_for_halo(sim, snap, halo_type, global_index):

    """
    Returns the chunk file number and local index (i.e. within the chunk file) of a particle/group/subgroup.

    Parameters
    ----------
    simpath       : string
                    path to the base simulation directory
    snap          : int
                    number of the snapshot to consider
    halo_type     : string
                    string identifying the type of halo considered. Accepted values: 'group', 'subhalo'
    global_index  : int
                    index of the halo in the snapshot (across all chunk files)

    Returns
    -------
    global_particle_index  : [int]*6
                             global index of the first particle of the halo, for each particle type
    """

    assert(halo_type in ['group', 'subhalo'])

    if halo_type == 'group':
        offset_file_key = 'Group/SnapByType'
    elif halo_type == 'subhalo':
        offset_file_key = 'Subhalo/SnapByType'
    
    with h5py.File(f'{simpath}/simulations.hdf5', 'r') as sim_file:
        Nchunks = sim_file[f'Snapshot/{snap}/Header'].attrs['NumFilesPerSnapshot']


    with h5py.File(f'{simpath}/postprocessing/offsets/offsets_{snap:03d}.hdf5', 'r') as offset_file:
        global_particle_index = offset_file[offset_file_key][global_index]

    return global_particle_index




def get_location_of_particles_in_halo(sim, snap, halo_type, global_index, verbose=False):

    """
    Returns the particles that belong to a group/subgroup.

    Parameters
    ----------
    simpath       : string
                    path to the base simulation directory
    snap          : int
                    number of the snapshot to consider
    halo_type     : string
                    string identifying the type of halo considered. Accepted values: 'group', 'subhalo'
    global_index  : int
                    index of the halo in the snapshot (across all chunk files)
    verbose       : bool, optional (default=False)
                    print info to stdout

    Returns
    -------
    files         : list of lists of strings
                    filenames of the snapshot chunk files containing particle of a given ptype
    start_index   : list of lists of int
                    initial index in the chunk file for particles of the halo
    final_index   : list of lists of int
                    final index in the chunk file for particles of the halo


    Example usage
    -------------
    files, start_index, final_index = get_location_of_particles_in_halo('Thesan-1', 80, 'subhalo', 43)
    gas_positions = []
    for filename, i_start, i_end in zip(files[0], start_index[0], final_index[0]):
        with h5py.File(filename, 'r') as f:
            gas_positions.append( f['PartType0/Coordinates'][i_start:i_end] )
    gas_positions = np.concatenate( gas_positions )
    """

    assert(halo_type in ['group', 'subhalo'])

    if halo_type == 'group':
        group_file_key = 'Group/GroupLenType'
    elif halo_type == 'subhalo':
        group_file_key = 'Subhalo/SubhaloLenType'
    
    with h5py.File(f'{simpath}/simulations.hdf5', 'r') as sim_file:
        Nchunks = sim_file[f'Snapshot/{snap}/Header'].attrs['NumFilesPerSnapshot']

    ichunk, ihalo = get_chunk_and_local_index(simpath, snap, halo_type, global_index)
    with h5py.File(f'{path}/output/groups_{snap:03d}/fof_subhalo_tab_{snap:03d}.{ichunk}.hdf5', 'r') as groups_file:
        halo_num_particles = groups_file[group_file_key][ihalo]
    nonzero_ptypes = np.where(halo_num_particles>0)[0]
    
    global_particle_index = get_particles_global_index_for_halo(simpath, snap, halo_type, global_index)
    
    particle_chunks      = np.array([-1]*6)
    particle_local_index = np.array([-1]*6)
    for ptype in range(6):
        if ptype in nonzero_ptypes:
            c, li = get_chunk_and_local_index(simpath, snap, 'particle', global_particle_index[ptype], ptype=ptype)
            particle_chunks[ptype] = c
            particle_local_index[ptype] = li

    if verbose:
        print(f"ichunk = {ichunk}, ihalo = {ihalo}")
        print(f"global_index = {global_index}, global_particle_index = {global_particle_index}, particle_chunks = {particle_chunks}, particle_local_index = {particle_local_index}, halo_num_particles = {halo_num_particles}")
    
    files       = [[], [], [], [], [], []]
    start_index = [[], [], [], [], [], []]
    final_index = [[], [], [], [], [], []]

    for ptype in nonzero_ptypes:
        num_read = 0
        extra_snap_chunk = 0
        while(num_read < halo_num_particles[ptype]):
            snapshot_file_name = f"{path}/output/snapdir_{snap:03d}/snap_{snap:03d}.{particle_chunks[ptype]+extra_snap_chunk}.hdf5"
            with h5py.File(snapshot_file_name, 'r') as snapshot_file:
                particles_here = min(halo_num_particles[ptype]-num_read, snapshot_file['Header'].attrs['NumPart_ThisFile'][ptype]-particle_local_index[ptype])
            if verbose:
                print( f"    ptype = {ptype}, extra_snap_chunk = {extra_snap_chunk}, num_read = {num_read}, particles_here = {particles_here}, halo_num_particles[ptype] = {halo_num_particles[ptype]}, particle_local_index[ptype] = {particle_local_index[ptype]}" )
            #particle_local_index[ptype]:particle_local_index[ptype]+particles_here]
            files[ptype].append( snapshot_file_name )
            start_index[ptype].append( particle_local_index[ptype] )
            final_index[ptype].append( particle_local_index[ptype]+particles_here )
            
            #update quantities
            num_read += particles_here
            extra_snap_chunk += 1
            particle_local_index[ptype] = 0

    return files, start_index, final_index