Protocol definition

Here is the flatbuffers definition of Batsim’s protocol. For examples of how to use each event of the protocl please refer to the high-level API reference.

namespace batprotocol.fb;
///
/// Profiles
///

// Groups of profiles
union Profile {
  DelayProfile
, ParallelTaskProfile
, ParallelTaskHomogeneousProfile
, SequentialCompositionProfile
, ForkJoinCompositionProfile
, ParallelTaskMergeCompositionProfile
, ParallelTaskOnStorageHomogeneousProfile
, ParallelTaskDataStagingBetweenStoragesProfile
, TraceReplayProfile
}

table DelayProfile {
  delay: float64;
}

table ParallelTaskProfile {
  /// The amount of computation (in flops) to do by each SimGrid executor. Can either be null or of size that equals the number of SimGrid executors.
  computation_vector: [float64];

  /// The amount of communication (in bytes) to do on each (oriented) link between the hosts where the SimGrid executors take place. Can either be null or of size that equals the square of the number of related SimGrid executors.
  communication_matrix: [float64];
}

table SequentialCompositionProfile {
  /// The number of times the whole sequence should be executed.
  repetition_count: uint32 = 1;
  profile_ids: [string] (required);
}

table ForkJoinCompositionProfile {
  profile_ids: [string] (required);
}

table ParallelTaskMergeCompositionProfile {
  profile_ids: [string] (required);
}

enum HomogeneousParallelTaskGenerationStrategy : byte {
  /// Every host computes computation_amount flops. Every link transfers communication_amount bytes (links from a host to itself transfers 0 bytes).
  DefinedAmountsUsedForEachValue

  /// Every host computes the same amount, and their sum equals computation_amount flops. Every link transfers the same amount (links from a host to itself transfers 0 bytes), and their sum equals communication_amount bytes.
, DefinedAmountsSpreadUniformly
}

table ParallelTaskHomogeneousProfile {
  /// The computation amount. Can be 0. How this value is used depends on generation_strategy.
  computation_amount: float64;

  /// The communication amount. Can be 0. How this value is used depends on generation_strategy.
  communication_amount: float64;

  /// The strategy used to generate the parallel task matrices.
  generation_strategy: HomogeneousParallelTaskGenerationStrategy;
}

table ParallelTaskOnStorageHomogeneousProfile {
  /// The storage on which the byte transfers occur.
  storage_name: string (required);

  /// The amount of bytes to read from the storage.
  bytes_to_read: float64;

  /// The amount of bytes to write on the storage.
  bytes_to_write: float64;

  /// The strategy used to generate the parallel task matrices. Only used to populate the communication matrix, as the computation vector is empty in this case.
  generation_strategy: HomogeneousParallelTaskGenerationStrategy;
}

table ParallelTaskDataStagingBetweenStoragesProfile {
  /// The amount of bytes to transfer from one storage to another.
  bytes_to_transfer: float64;

  /// The storage that sends data.
  emitter_storage_name: string (required);

  /// The storage that receives data.
  receiver_storage_name: string (required);
}

enum TraceType : byte {
  SMPI
, FractionalComputation
}

table TraceReplayProfile {
  trace_type: TraceType;

  /// The filename of the main trace file. This file should define a list of per-executor trace files.
  filename: string (required);
}


table Job {
  /// Defines how many resources the job requests. This also defines the number of SimGrid executors.
  resource_request: uint32;

  /// If set, this is the amount of seconds after which the job will be automatically killed by Batsim.
  /// If null, the job has no walltime, it will not be automatically killed by Batsim.
  walltime: float64 = null;

  /// Custom user-defined data for the job. Batsim does not use this field at all, but external decision components (e.g., a scheduler or a dynamic submitter) can.
  extra_data: string;

  /// The job's profile name, which defines how the job should be simulated by Batsim. Expecting a "workload!name" syntax, but "name" is also allowed (in this case, only profiles from the job's workload can be used).
  profile_id: string (required);
}


table JobSubmittedEvent {
  job_id: string (required);
  job: Job (required);

  /// The point in time at which the job has been submitted.
  submission_time: float64;

  /// The Job profile. Only forwarded if forward_profiles_on_job_submission is set
  profile: ProfileAndId;
}

// Completion
enum FinalJobState : byte {
  /// Job executed without reaching walltime, with exit_code=0.
  COMPLETED_SUCCESSFULLY

  /// Job executed without reaching walltime, with exit_code!=0.
, COMPLETED_FAILED

  /// Job executed but reached its walltime (killed automatically by Batsim).
, COMPLETED_WALLTIME_REACHED

  /// Job executed but killed by the decision process.
, COMPLETED_KILLED

  /// Job not executed. Has been rejected by the decision process.
, REJECTED
}

table JobCompletedEvent {
  job_id: string (required);

  /// The job's final state.
  state: FinalJobState;

  /// The job's return code. 0 is equivalent to a successful completion.
  return_code: int32 = 0;
}

// Rejection
table RejectJobEvent {
  job_id: string (required);
}

// Execution
enum PredefinedExecutorPlacementStrategy : byte {
  /// Host1 will be used, then Host2, ..., then HostN, then Host1...
  SpreadOverHostsFirst

  /// Host1 will be used, then Host1, ... (until Host1 cores are filled), then Host2...
, FillOneHostCoresFirst
}

table PredefinedExecutorPlacementStrategyWrapper {
  strategy: PredefinedExecutorPlacementStrategy = SpreadOverHostsFirst;
}

table CustomExecutorToHostMapping {
  /// Used to define precisely which host should be used by each SimGrid executor. The vector size must match the amount of SimGrid executors, which is defined by the (profile, host_allocation) tuple. The value at index i is used to determine where SimGrid executor i runs. Values are NOT host resource ids, they are unsigned integers in [0, size(host_allocation)[ that are used to refer to hosts indirectly via host_allocation.
  mapping: [uint32] (required);
}

union ExecutorPlacement {
  /// Use a predefined placement strategy.
  PredefinedExecutorPlacementStrategyWrapper

  /// Use a precise manual placement.
, CustomExecutorToHostMapping
}

table StorageHost {
  storage_name: string (required);

  /// The host unique identifier (as given by Batsim)
  host_id: uint32;
}

table AllocationPlacement {
  /// Intervalset of hosts where the job should be executed.
  host_allocation: string (required);

  /// Placement of SimGrid executors on hosts.
  executor_placement: ExecutorPlacement (required);
}

table ProfileAllocationPlacement {
  /// The profile unique identifier. Expected syntax is "workload!name".
  profile_id: string (required);

  /// Intervalset of hosts where the profile should be executed.
  host_allocation: string (required);

  /// Placement of SimGrid executors on hosts for this profile.
  executor_placement: ExecutorPlacement (required);
}

table ExecuteJobEvent {
  job_id: string (required);

  /// Defines the job's main resource allocation and the placement of SimGrid executors on the allocated hosts.
  allocation: AllocationPlacement (required);

  /// Can be set to override the placement of sub-profiles. Only makes sense if the job's main profile is a composition profile.
  profile_allocation_override: [ProfileAllocationPlacement];

  /// Overrides the placement of storage names. Enable the use of abstract storage names in your profiles.
  storage_placement: [StorageHost];
}

// Kill
table KillJobsEvent {
  job_ids: [string] (required);
}

table KillProgressAtomicProfile {
  /// The profile that has been killed. Syntax is "workload!name".
  profile_id: string (required);

  /// The progress of the profile's execution in [0,1]. 0 means the profile execution has just started, 1 means it has just finished.
  progress: float64;
}

table KillProgressSequentialProfile {
  /// The profile that has been killed. Syntax is "workload!name".
  profile_id: string (required);

  /// The iteration of the associated SequentialCompositionProfile when it has been killed.
  current_repetition: uint32;

  /// The index in [0, size(profiles)[ of the associated SequentialCompositionProfile when it has been killed.
  current_task_index: uint32;

  /// The (recursively defined) progress of the task of the associated SequentialCompositionProfile.
  current_task_progress: KillProgressWrapper (required);
}

table KillProgressForkJoinProfile {
  /// The profile that has been killed. Syntax is "workload!name".
  profile_id: string (required);

  /// The (recursively defined) progress of the tasks of the associated ForkJoinCompositionProfile.
  tasks_progress: [KillProgressWrapper] (required);
}

union KillProgress {
  KillProgressAtomicProfile
, KillProgressSequentialProfile
, KillProgressForkJoinProfile
}

/// Wrapping needed because of a missing flatbuffers feature: https://github.com/google/flatbuffers/issues/4725
table KillProgressWrapper {
  kill_progress: KillProgress (required);
}

table JobAndProgress {
  job_id: string (required);
  wrapper: KillProgressWrapper (required);
}

table JobsKilledEvent {
  job_ids: [string] (required);
  progresses: [JobAndProgress] (required);

  /// Only used if forward_profiles_on_jobs_killed is set
  profiles: [ProfileAndId];
}

// Dynamic registration
table ProfileAndId {
  /// Using "workload!name" syntax.
  id: string (required);
  profile: Profile (required);

  /// Custom user-defined data for the profile. Batsim does not use this field at all, but external decision components (e.g., a scheduler or a dynamic submitter) can.
  extra_data: string;
}

table RegisterProfileEvent {
  profile: ProfileAndId (required);
}

table RegisterJobEvent {
  /// Using "workload!name" syntax.
  job_id: string (required);
  job: Job (required);
}

///
/// Resource management events
///

union Resources {
  HostResources
, LinkResources
}

table HostResources {
  /// The intervalset of host ids
  host_ids: string (required);
}

table LinkResources {
  /// The names of the links
  link_ids: [string] (required);
}

// Probe Creation
union ProbeMeasurementTriggeringPolicy {
  /// Union because later we will implement other kind of trigger (as react to an other probe event)
  TemporalTriggerWrapper
}

union ProbeDataAccumulationStrategy {
  NoProbeDataAccumulation
, ProbeDataAccumulation
}

table NoProbeDataAccumulation {
}

union ResetMode {
  NoReset
, ProbeAccumulationReset
}

table NoReset {
}

table ProbeAccumulationReset {
  new_value: float64;
}

enum CumulativeFunction : byte {
  Sum
, Minimum
, Maximum
}

table ProbeDataAccumulation {
  reset_mode: ResetMode (required);
  cumulative_function: CumulativeFunction;
  temporal_normalization: bool;
}

enum Metrics : byte {
  /// The power in J/s
  Power

  /// The network load in bytes/s
, NetworkLoad

  /// The load in flop/s (amount of computing operations per second)
, ComputationLoad

  /// The normalized network load in [0,1]. Cannot be accumulated over time as this metrics is not defined per unit of time.
, NormalizedNetworkLoad

  /// The normalized network load in [0,1]. Cannot be accumulated over time as this metrics is not defined per unit of time.
, NormalizedComputationLoad
}

union TemporalAggregationFunction {
  NoTemporalAggregation
, ExponentialMovingAverage
, SlidingWindowOperation
}

table NoTemporalAggregation {
}

table ExponentialMovingAverage {
  // TODO
}

table SlidingWindowOperation {
  // TODO
}

table NoResourcesAggregation {
}

table Sum {
}

table ArithmeticMean {
}

table QuantileFunction {
  /// The normalized threshold (in [0,1]) whose value of the quantile function (AKA inverse cumulative distribution function) is sought. threshold=1/2 means that we want the value of the median. threshold=1/4 means that we want the value of the first quartile.
  threshold: float64;
}

union ResourcesAggregationFunction {
  NoResourcesAggregation
, Sum
, ArithmeticMean
, QuantileFunction
}

union ProbeEmissionFilteringPolicy {
  NoFiltering
, ThresholdFilteringFunction
}

enum BooleanComparisonOperator : byte {
  GreaterThan
, GreaterThanOrEqual
, LessThan
, LessThanOrEqual
}

table NoFiltering {
}

table ThresholdFilteringFunction {
  /// The threshold
  threshold: float64;

  /// The operator for our filter
  operator: BooleanComparisonOperator;
}

table CreateProbeEvent {
  /// The name of the probe
  probe_id: string (required);

  /// The metrics that we want to study
  metrics: Metrics;

  /// The resources onto which the probe should be set
  resources: Resources (required);

  data_accumulation_strategy: ProbeDataAccumulationStrategy (required);

  /// When our probe is taking measures
  measurement_triggering_policy: ProbeMeasurementTriggeringPolicy (required);

  /// The resources aggregation function
  resources_aggregation_function: ResourcesAggregationFunction (required);

  temporal_aggregation_function: TemporalAggregationFunction (required);

  /// The condition that has to be respected to emited data
  emission_filtering_policy: ProbeEmissionFilteringPolicy (required);
}

table StopProbeEvent {
  /// The name of the probe
  probe_id: string (required);
}

table ResetProbeEvent {
  /// The name of the probe
  probe_id: string (required);

  /// The new desired value
  new_value: float64;
}

table TriggerProbeEvent {
  /// The name of the probe
  probe_id: string (required);

  /// If set to true, the triggering will always result in a ProbeDataEmittedEvent regardless of the Probe data emitting filtering policy.
  force_data_emission: bool = true;
}

// Probe Data
union ProbeData {
  AggregatedProbeData
, VectorialProbeData
}

table AggregatedProbeData {
  /// The value aggregated over all the resources
  data: float64;
}

table VectorialProbeData {
  /// The values (one value per resource with the same order as in the probe resources)
  data: [float64] (required);
}

table ProbeDataEmittedEvent {
  /// The name of the probe
  probe_id: string (required);

  /// The resources observed by the probe
  resources: Resources (required);

  /// The metrics measured by the probe
  metrics: Metrics;

  /// The returned value
  data: ProbeData (required);

  /// True if the Event has been directly requested by the user via a TriggerProbeEvent, false otherwise
  manually_triggered: bool;

  /// The number of times that the probe has emitted data
  nb_emitted: uint32;

  /// The number of times that the probe has been triggered
  nb_triggered: uint32;
}

///
/// Simulation management events
///

enum TimeUnit : byte {
  Second
, Millisecond
}

/// A temporal trigger that happens only once, at a given time.
table OneShot {
  /// The point in time at which the event should occur.
  time: uint64;

  /// The time unit
  time_unit: TimeUnit = Second;
}

table Infinite {}

table FinitePeriodNumber {
  nb_periods: uint32;
}

union PeriodicMode {
  Infinite
, FinitePeriodNumber
}

table Periodic {
  /// The _distance in time_ between two consecutive events.
  period: uint64;

  /// How measures should be offset from time 0.
  offset: uint64 = 0;

  /// The time unit in which period and offset are expressed
  time_unit: TimeUnit = Second;

  mode: PeriodicMode (required);
}

union TemporalTrigger {
  OneShot
, Periodic
}

/// Wrapping needed because of a missing flatbuffers feature: https://github.com/google/flatbuffers/issues/4725
table TemporalTriggerWrapper {
  temporal_trigger: TemporalTrigger (required);
}

table CallMeLaterEvent {
  /// Can be used by the decision process to identify why the CallMeLaterEvent has been requested. This field is forwarded in all RequestedCallEvent related to a CallMeLaterEvent.
  call_me_later_id: string (required);

  /// Defines when the future call will be done.
  when: TemporalTrigger (required);
}

table RequestedCallEvent {
  call_me_later_id: string (required);

  // True if and only if this RequestedCallEvent is the last one from a non-infinite periodic call
  last_periodic_call: bool = false;
}

table StopCallMeLaterEvent {
  call_me_later_id: string (required);
}

table EDCRequestedSimulationFeatures {
  /// If true the dynamic registration of jobs and profiles will be enabled for this EDC. This means the EDC must send a FinishRegistrationEvent so the simulation can end.
  dynamic_registration: bool = false;

  /// If true any existing profile can be used to register dynamic jobs. Otherwise the EDC must make sure that the desired profile has not been garbage collected from Batsim memory when registering a job (which happens when the last job that references a profile completes).
  profile_reuse: bool = false;

  /// If true Batsim will emit a JobSubmittedEvent to this EDC when the EDC registers a job.
  acknowledge_dynamic_jobs: bool = false;

  /// If true Batsim will include profile information in JobSubmittedEvent.
  forward_profiles_on_job_submission: bool = false;

  /// If true Batsim will include profile information in JobsKilledEvent.
  forward_profiles_on_jobs_killed: bool = false;

  /// If true Batsim will include profile information in SimulationBeginsEvent.
  forward_profiles_on_simulation_begins: bool = false;
}

table EDCHelloEvent {
  /// The version of the batprotocol. Expecting a released version of batprotocol, for example "0.1.0".
  batprotocol_version: string (required);

  /// The Git commit of the batprotocol version. Optional but recommended for tracability.
  batprotocol_commit: string;

  /// The name of the decision component. For example "batsched" or "pybatsim". Cannot contain comma (ASCII/UTF-8 0x2C) nor double quote (ASCII/UTF-8 0x22).
  decision_component_name: string (required);

  /// The version of the decision component. Only used for tracability purpose. For example "1.2.0", "v3.2.0-1-g880dd60"... Cannot contain comma (ASCII/UTF-8 0x2C) nor double quote (ASCII/UTF-8 0x22).
  decision_component_version: string (required);

  /// The commit (from a version control system, typically Git) that precisely identifies the decision component version. Can be null. Example: "880dd60c537d7d7a8246daaf5b2d1f7bfea3cbf4". Cannot contain comma (ASCII/UTF-8 0x2C) nor double quote (ASCII/UTF-8 0x22).
  decision_component_commit: string;

  /// The simulation features requested by the decision component.
  requested_simulation_features: EDCRequestedSimulationFeatures (required);
}

table HostProperty {
  key: string (required);
  value: string (required);
}

enum HostState : byte {
  SLEEPING
, IDLE
, COMPUTING
, SWITCHING_ON
, SWITCHING_OFF
// TODO: failure states
}

table Host {
  /// The host unique identifier.
  id: uint32;

  /// The host name.
  name: string (required);

  /// The host power state.
  pstate: uint32 = 0;

  /// The host power state count.
  pstate_count: uint32 = 1;

  /// The host state.
  state: HostState;

  /// The properties defined in this specific host.
  properties: [HostProperty];

  /// The properties defined for the Zone that contains this host.
  zone_properties: [HostProperty];

  /// The host core count.
  core_count: uint32 = 1;

  /// The host computation speeds in each power state. Vector is indexed by power state number in [0,pstate_count[.
  computation_speed: [float64] (required);
}

table WorkloadAndFilename {
  /// The workload unique name. This name will be prefixed in jobs with the "workload_name!job_id" syntax.
  name: string (required);

  /// The workload filename.
  filename: string (required);
}

table ExternalEventListAndFilename {
  /// The external event list unique name.
  name: string (required);

  /// The external event list filename.
  filename: string (required);
}

table SimulationBeginsEvent {
  /// The version of the batprotocol. Expecting a semantic versioning format. For example "1.0.0", "0.1.0+9a04b9d60a211c380d62abbe70e2a3b2ffb869b3"...
  batprotocol_version: string (required);

  /// The version of Batsim. Only used for tracability purpose. For example "4.0.0", "v4.0.0-69-g4f12a4b"... Cannot contain comma (ASCII/UTF-8 0x2C) nor double quote (ASCII/UTF-8 0x22).
  batsim_version: string (required);

  /// The Git commit that precisely identifies the Batsim version. Example: "4f12a4b51ab38ff2936bc1116fa3f34a05f494e6". If not null, only contains hexadecimal characters.
  batsim_commit: string;

  /// The number of hosts in the simulated platform.
  host_number: uint32;

  /// The number of computation hosts in the simulated platform.
  computation_host_number: uint32;

  /// The computation hosts.
  computation_hosts: [Host] (required);

  /// The number of storage hosts in the simulated platforms.
  storage_host_number: uint32;

  /// The storage hosts.
  storage_hosts: [Host] (required);

  /// Batsim's command-line arguments as received by Batsim.
  batsim_arguments: [string] (required);

  /// Batsim's execution context, similar to what you would obtain via Batsim's command-line interface.
  batsim_execution_context_json: string (required);

  /// The static workloads given to Batsim as input.
  workloads: [WorkloadAndFilename] (required);

  /// The static external event lists given to Bastsim as input.
  external_event_lists: [ExternalEventListAndFilename] (required);

  /// The profiles defined in the static workloads given to Batsim as input. Can be null if profiles are not forwarded.
  profiles: [ProfileAndId];
}

table SimulationEndsEvent {
}

table SimulationErrorEvent {
  error: string (required);
}

table AllStaticJobsHaveBeenSubmittedEvent {
}

table AllStaticExternalEventsHaveBeenInjectedEvent {
}

table FinishRegistrationEvent {
}

table ForceSimulationStopEvent {
}

/// The EDC asks Batsim to change the Pstate of some hosts
table ChangeHostsPStateEvent {
  /// Intervalset of hosts on which to change the PState
  host_ids: string (required);

  /// The new pstate
  pstate: uint32;
}

/// Batsim tells the EDC that Pstate of some hosts changed
table HostsPStateChangedEvent {
  /// Intervalset of hosts on which the PState changed
  host_ids: string (required);

  /// The new pstate
  pstate: uint32;
}

/// The EDC asks Batsim to turn on/off some hosts
table TurnOnOffHostsEvent {
  /// Intervalset of hosts to turn on/off
  host_ids: string (required);

  /// The virtual pstate to reach
  state: uint32;
}

/// Batsim tells the EDC that the state of some hosts changed
table HostsTurnedOnOffEvent {
  /// Intervamset of hosts impacted
  host_ids: string (required);

  /// The new state
  state: uint32;
}


/// List of external event types
union ExternalEvent {
  GenericExternalEvent
}

/// A generic external event containing a string of data
table GenericExternalEvent {
  /// The data associated to the external event
  data: string (required);
}

/// Batsim tells the EDC that an external event has occurred
table ExternalEventOccurredEvent {
  /// The unique identifier
  id: string (required);

  /// The external event
  external_event: ExternalEvent (required);
}


///
/// Main message type
///
union Event {
  JobSubmittedEvent
, JobCompletedEvent
, RejectJobEvent
, ExecuteJobEvent
, KillJobsEvent
, JobsKilledEvent

, RegisterProfileEvent
, RegisterJobEvent

, CreateProbeEvent
, StopProbeEvent
, TriggerProbeEvent
, ResetProbeEvent
, ProbeDataEmittedEvent

, CallMeLaterEvent
, RequestedCallEvent
, StopCallMeLaterEvent

, EDCHelloEvent
, SimulationBeginsEvent
, SimulationEndsEvent
, SimulationErrorEvent
, AllStaticJobsHaveBeenSubmittedEvent
, AllStaticExternalEventsHaveBeenInjectedEvent
, FinishRegistrationEvent
, ForceSimulationStopEvent
, ChangeHostsPStateEvent
, HostsPStateChangedEvent
, TurnOnOffHostsEvent
, HostsTurnedOnOffEvent
, ExternalEventOccurredEvent
}

table EventAndTimestamp {
  timestamp: float64;
  event: Event (required);
}

table Message {
  now: float64;
  events: [EventAndTimestamp] (required);
}

root_type Message;