From MultimediaWiki
Revision as of 20:30, 16 August 2012 by Espes (talk | contribs) (Xact moved to XACT: Capitalise)
Jump to navigation Jump to search

Cross-platform Audio Creation Tool (XACT) is a audio framework released as part of the DirectX SDK.

Distributed XACT projects consist of three file formats:

Wave Banks (.xwb)

A file format containing a collection of waves

Open source parsers:

Sound Banks (.xsb)

A collection of sounds and cues

  • Sounds - a sound has one or more waves together with properties like volume and pitch. Sounds are made up of tracks.
    • Tracks - tracks are made up of events E.g. the simplest track has a Play Wave event
    • Events - various actions that take place within a track. Actions include: Play, Stop, Set Volume, Set Pitch etc.
  • Cues - a cue is used in code to trigger sounds. Each cue is made up of one or more sounds
sturct SoundBank {
    struct SoundBankHeader header;

    struct WaveBankNameTableEntry[header.numWaveBanks] wavebankNames;
    struct SoundEntry[header.numSounds] sounds;
    struct SimpleCueEntry[header.numSimpleCues] simpleCues;
    struct ComplexCueEntry[header.numComplexCues] complexCues;


struct SoundBankHeader {
    uint32_t magic; //               "SDBK"
    uint16_t toolVersion; //         0x2E
    uint16_t formatVersion; //       0x2B
    uint16_t crc; //      fcs16 checksum of all following data
    uint32_t lastModifiedLow;
    uint32_t lastModifiedHigh;
    uint8_t platform; //0x12         ??
    uint16_t numSimpleCues; //0x13
    uint16_t numComplexCues; //0x15
    uint16_t unkn3; //0x17
    uint16_t numTotalCues; //0x19    ??
    uint8_t numWaveBanks; //0x1b
    uint16_t numSounds; //0x1c
    uint32_t cueNameTableLen; //0x1e
    uint32_t simpleCuesOffset; //0x22
    uint32_t complexCuesOffset; //0x26
    uint32_t cueNamesOffset; //0x2a
    uint32_t unknOffset; //0x2e
    uint32_t variationTablesOffset; //0x32
    uint32_t unknOffset2; //0x36
    uint32_t waveBankNameTableOffset; //0x3a
    uint32_t cueNameHashTableOffset; //0x3e    16-bit hashes each
    uint32_t cueNameHashValsOffset; //0x42
    uint32_t soundsOffset; //0x46
    char name[64];

struct WaveBankNameTableEntry {
    char name[64];

sturct SoundEntry {
    uint8_t flags; //0x0
    uint16_t category; //0x1      ???
    uint8_t unkn2; //0x3
    uint16_t volume; //0x4 maybe pitch
    uint8_t unkn3; //0x6
    uint16_t entryLength; //0x7
    if flags 1 (complex sound):
        uint8_t numClips; //0x9
        uint16_t trackIndex; //0x9
        uint8_t waveBankIndex; //0x11

    if flags in 2/4/8:
        uint16_t extraLen; //0x12
        //then a rpc table for each flag
    if flags in 16:
        uint16_t extraLen; //0x12     7

        //dsp preset table
         uint8_t num;
         uint32_t unkn[num];
    if flags 1:
        struct {
            uint8_t unkn;
            uint32_t clipOffset;
            uint32_t unkn2;

sturct SoundClip {
    uint8_t numEvents;
    struct ClipEvent[numEvents];

struct ClipEvent {
    uint32_t eventInfo; //event id in lower 5 bits

//event id 1, size 16
struct EventPlayWave : ClipEvent {
    char unkn[4]; //0x4
    uint16_t trackIndex; //0x8
    uint8_t waveBank; //0xa
    uint8_t unkn; //0xb
    uint16_t unkn2; //0xc
    uint16_t unkn3; //0xe

//event id 3, size at least 13
struct EventPlayWaveVariation : ClipEvent {
    char unkn[5];
    int16_t unkn2;
    int16_t unkn3;

    struct VariationTable; ///wtf!?

//event id 4, size 40
struct EventPlayWavePitchVolumeFilterVariation : EventPlayWave {
    struct PitchVolumeFilterVariation;

struct PitchVolumeFilterVariation {
    int16_t unkn;
    int16_t unkn2;
    uint8_t unkn3;
    uint8_t unkn4;
    float unkn5;
    float unkn6;
    float unkn7;
    float unkn8;
    uint16_t flags;

struct SimpleCueEntry { //cue that maps straight to a sound
    uint8_t flags;
    uint32_t soundOffset;

//size 15
struct ComplexCueEntry {
    uint8_t flags;
    uint32_t variationTableOffset;
    uint32_t transitionTableOffset;
    struct InstanceLimit;
struct InstanceLimit {
    uint32_t unkn;
    uint8_t unkn2;
    uint8_t unkn3; //dono if here or in ComplexCueEntry

enum VariationTableType {
struct VariationTable {
    uint32_t flags;
        //numEnties in lower 16 bits
        //flags in upper 16 bits
        //      -table type in bits 3,4,5
    uint8_t unkn;
    uint16_t unkn2;
    uint8_t unkn3;
    table 0:
    struct {
        uint16_t trackIndex; //?
        uint8_t waveBank;
        uint8_t weightMin; //?
        uint8_t weightMax; //?
    table 1:
    struct {
        uint32_t soundOffset;
        uint8_t weightMin; //?
        uint8_t weightMax; //?
    table 2:
    struct {
        uint32_t unkn;
        uint8_t weightMin; //?
        uint8_t weightMax; //?
    table 3:
    struct {
        uint32_t soundOffset; //?
        float weightMin;
        float weightMax;
        uint32_t unkn;
    table 4:
    struct {
        uint16_t trackIndex; //?
        uint8_t waveBank;

struct CueNameHashVal {
    uint32_t nameOffset;
    uint16_t unkn;

Global Settings (.xgs)

Defines rules and settings for sounds.

  • Categories - sounds can be assigned to a category (only one each) that specifies certain rules like number of instances along with settings like volume. You could create a category for the sounds of one character in your game so they all have the same volume. There are three predefined categories: global, default and Music.
  • Variables - these can be defined in the design stage and then referenced by the programmer in code to control Run-Time Parameter Controls
    • Run-Time Parameter Controls - also known as 'sliders'. These allow the control of sound parameters as the sound plays. For example they could be used to control the pitch of a car engine sound so as the accelerator is pressed the pitch is changed
  • DSP Effect Path Presets (DSPs) - allow effects like reverb to be applied to sounds
  • Compression Presets - compression can be applied to waves or wave banks

Open source parsers: