Quick start guide for `poly-scribe`

Let us walk through a simple example of how to use poly-scribe environment to handle a polymorphic data structure in C++ and Python. For this example we will consider a configuration structure for a plugin system where each plugin can have different parameters and the type of the plugin is determined from the configuration.

Step 1: Define the WebIDL

First, we need to define the WebIDL for our configuration structure.

typedef [Size=3] sequence<double> Vector; // (1)

enum Enumeration { // (2)
    "value1",
    "value2"
};

dictionary PluginBase // (3)
{
    required string name; // (4)
    required string description;
};

dictionary PluginA : PluginBase
{
    int paramA = 42; // (5)
    Vector paramVector; // (6)
};

dictionary PluginB : PluginBase
{
    float paramB;
    Enumeration paramEnum;
};

dictionary PluginSystem
{
    record<ByteString, PluginBase> plugin_map; // (7)
};

This defines a fixed-size sequence of doubles. The extra attribute Size=3 indicates that the sequence will always have exactly three elements.
This is an enumeration with two possible values.
This is the base data structure for all plugins.
name and description are required fields for the base plugin configuration, these must be provided for all plugins.
paramA is an optional parameter with a default value of 42.
If not specified via the required attribute, the parameter is optional.
This maps a string to a base plugin configuration, allowing for polymorphic data structures.

Step 2: Generate the Code

For a C++ project, the easiest way to generate the code is to use the CMake function provided by poly-scribe. It is recommended to use add poly_scribe either as a submodule or via fetch content in your project. In your CMakeLists.txt, you can then add the following lines:

add_library (plugin_data INTERFACE)

generate_data_structures (
    plugin_data
    IDL_FILE plugin.webidl
    OUTPUT_CPP "plugin_data.hpp"
    AUTHOR_NAME "Max Mustermann"
    AUTHOR_MAIL "dummy@mail.com"
    NAMESPACE "plugin_namespace"
    LICENCE "MIT"
    OUTPUT_HEADER_DIR plugin_data_dir
    OUTPUT_PYTHON_PKG plugin_data
    OUTPUT_PYTHON_PKG_VAR PYTHON_PKG_GENERATED
    OUTPUT_SCHEMA plugin_schema.json
    OUTPUT_SCHEMA_VAR SCHEMA_GENERATED
    OUTPUT_SCHEMA_CLASS PluginSystem
)

This will generate three things:

plugin_data_dir/plugin_data.hpp: The C++ header file with the generated code.
A Python package plugin_data with the generated code.
plugin_schema.json: The JSON schema for the data structure.

The two variables PYTHON_PKG_GENERATED and SCHEMA_GENERATED will contain the paths to the generated Python package and JSON schema respectively. The C++ code will automatically be linked to the supplied target plugin_data.

Generated code

plugin_data.hppplugin_data.pyplugin_schema.hpp

/**
 * \file plugin_data.hpp
 * \brief Automatically generated using poly-scribe-code-gen.
 * \author Max Mustermann dummy@mail.com
 * \copyright
 * Copyright (c) 2025-present Max Mustermann
 * Distributed under the MIT licence.
 */

#pragma once

#include <poly-scribe/poly-scribe.hpp>

// NOLINTBEGIN

namespace plugin_namespace {

    // Forward declarations
    struct PluginBase;
    struct PluginA;
    struct PluginB;
    struct PluginSystem;

    enum class Enumeration {
        value1,
        value2
    };

    using Vector = std::array<double, 3>;

    using PluginBase_t = rfl::TaggedUnion<"type", PluginBase, PluginA, PluginB>;

    struct PluginBase {
        std::string name;
        std::string description;
    };

    struct PluginA {
        // Inherited from PluginBase
        std::optional<int> paramA = 42;
        std::optional<Vector> paramVector;
        std::string name;
        std::string description;
    };

    struct PluginB {
        // Inherited from PluginBase
        std::optional<float> paramB;
        std::optional<Enumeration> paramEnum;
        std::string name;
        std::string description;
    };

    struct PluginSystem {
        std::optional<std::unordered_map<std::string, PluginBase_t>> plugin_map;
    };

}  // namespace plugin_namespace

// NOLINTEND

from pathlib import Path
from typing import (Annotated, Any, Dict, List, Literal, Optional, Tuple, Type,
                    TypeVar, Union)

import cbor2
from annotated_types import Len
from pydantic import BaseModel, Field
from pydantic_yaml import parse_yaml_file_as, to_yaml_file
from strenum import StrEnum

T = TypeVar("T", bound=BaseModel)


Vector = Annotated[List[float], Len(min_length=3, max_length=3)]


class Enumeration(StrEnum):
    value1 = "value1"
    value2 = "value2"


class PluginBase(BaseModel):
    name: str
    description: str
    type: Literal["PluginBase"] = "PluginBase"


class PluginA(PluginBase):
    paramA: Optional[int] = 42
    paramVector: Optional[Vector] = None
    type: Literal["PluginA"] = "PluginA"


class PluginB(PluginBase):
    paramB: Optional[float] = None
    paramEnum: Optional[Enumeration] = None
    type: Literal["PluginB"] = "PluginB"


class PluginSystem(BaseModel):
    plugin_map: Optional[
        Dict[
            str,
            Annotated[Union[PluginA, PluginB, PluginBase], Field(discriminator="type")],
        ]
    ] = None


def load(model_type: Type[T], file: Union[Path, str]) -> T:
    if isinstance(file, str):
        file = Path(file).resolve()
    elif isinstance(file, Path):
        file = file.resolve()
    else:
        msg = f"Expected Path or str, but got {file!r}"
        raise TypeError(msg)

    if not file.exists():
        msg = f"File {file} does not exist"
        raise FileNotFoundError(msg)

    if file.suffix == ".yaml":
        return parse_yaml_file_as(model_type, file)
    elif file.suffix == ".json":
        json_string = file.read_text()
        return model_type.model_validate_json(json_string)
    elif file.suffix == ".cbor":
        with file.open("rb") as f:
            data = cbor2.load(f)
        return model_type.model_validate(data)
    else:
        raise ValueError(f"Unsupported file extension {file.suffix}")


def save(file: Union[Path, str], model: Union[BaseModel]):
    if isinstance(file, str):  # local path to file
        file = Path(file).resolve()
    elif isinstance(file, Path):
        file = file.resolve()
    else:
        raise TypeError(f"Expected Path, str, or stream, but got {file!r}")

    if file.suffix == ".yaml":
        to_yaml_file(file, model)
    elif file.suffix == ".json":
        json_string = model.model_dump_json(indent=4)
        file.write_text(json_string)
    elif file.suffix == ".cbor":
        with file.open("wb") as f:
            cbor2.dump(model.model_dump(), f)
    else:
        raise ValueError(f"Unsupported file extension {file.suffix}")

{
  "$defs": {
    "Enumeration": {
      "enum": [
        "value1",
        "value2"
      ],
      "title": "Enumeration",
      "type": "string"
    },
    "PluginA": {
      "properties": {
        "name": {
          "title": "Name",
          "type": "string"
        },
        "description": {
          "title": "Description",
          "type": "string"
        },
        "type": {
          "const": "PluginA",
          "default": "PluginA",
          "title": "Type",
          "type": "string"
        },
        "paramA": {
          "anyOf": [
            {
              "type": "integer"
            },
            {
              "type": "null"
            }
          ],
          "default": 42,
          "title": "Parama"
        },
        "paramVector": {
          "anyOf": [
            {
              "items": {
                "type": "number"
              },
              "maxItems": 3,
              "minItems": 3,
              "type": "array"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Paramvector"
        }
      },
      "required": [
        "name",
        "description"
      ],
      "title": "PluginA",
      "type": "object"
    },
    "PluginB": {
      "properties": {
        "name": {
          "title": "Name",
          "type": "string"
        },
        "description": {
          "title": "Description",
          "type": "string"
        },
        "type": {
          "const": "PluginB",
          "default": "PluginB",
          "title": "Type",
          "type": "string"
        },
        "paramB": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Paramb"
        },
        "paramEnum": {
          "anyOf": [
            {
              "$ref": "#/$defs/Enumeration"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        }
      },
      "required": [
        "name",
        "description"
      ],
      "title": "PluginB",
      "type": "object"
    },
    "PluginBase": {
      "properties": {
        "name": {
          "title": "Name",
          "type": "string"
        },
        "description": {
          "title": "Description",
          "type": "string"
        },
        "type": {
          "const": "PluginBase",
          "default": "PluginBase",
          "title": "Type",
          "type": "string"
        }
      },
      "required": [
        "name",
        "description"
      ],
      "title": "PluginBase",
      "type": "object"
    }
  },
  "properties": {
    "plugin_map": {
      "anyOf": [
        {
          "additionalProperties": {
            "discriminator": {
              "mapping": {
                "PluginA": "#/$defs/PluginA",
                "PluginB": "#/$defs/PluginB",
                "PluginBase": "#/$defs/PluginBase"
              },
              "propertyName": "type"
            },
            "oneOf": [
              {
                "$ref": "#/$defs/PluginA"
              },
              {
                "$ref": "#/$defs/PluginB"
              },
              {
                "$ref": "#/$defs/PluginBase"
              }
            ]
          },
          "type": "object"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "title": "Plugin Map"
    }
  },
  "title": "PluginSystem",
  "type": "object"
}

Step 3: Use the Generated Code

Now that we have the generated code, we can use it in our C++ and Python projects. For example, we can use python to script the generation of the configuration file and then load it in C++.

import plugin_data as pd

pd_system = pd.PluginSystem(
    plugin_map={
        "plugin1": pd.PluginA(
            name="Plugin A",
            description="This is Plugin A",
            paramVector=[1.0, 2.0, 3.0],
        ),
        "plugin2": pd.PluginB(
            name="Plugin B",
            description="This is Plugin B",
            paramB=3.14,
            paramEnum=pd.Enumeration.value1,
        ),
    }
)

pd.save("example.json", pd_system)

#include "plugin_data.hpp"

// This is required so that gcc compiles the code correctly.
template<typename T>
[[noreturn]] void unknown_plugin_type_static_assert( )
{
    static_assert( sizeof( T ) == 0, "Unknown plugin type" );
}

int main( int argc, char** argv )
{
    if( argc < 2 )
    {
        std::cerr << "Usage: " << argv[0] << " <input_file>" << std::endl;
        return 1;
    }

    std::filesystem::path input_file = argv[1];

    auto result = poly_scribe::load<plugin_namespace::PluginSystem>( input_file );
    if( !result )
    {
        std::cerr << "Error loading file: " << result.error( ).what( ) << std::endl;
        return 1;
    }

    plugin_namespace::PluginSystem& plugin_system = result.value( );

    if( plugin_system.plugin_map )
    {
        for( const auto& [name, plugin]: *plugin_system.plugin_map )
        {
            std::cout << "Plugin Key: " << name << std::endl;

            auto visitor = []( const auto& plugin )
            {
                using Type = std::decay_t<decltype( plugin )>;

                if constexpr( std::is_same_v<Type, plugin_namespace::PluginA> )
                {
                    std::cout << "Type: PluginA" << std::endl;
                    std::cout << "Name: " << plugin.name << std::endl;
                    std::cout << "Description: " << plugin.description << std::endl;
                    std::cout << "Param A: " << plugin.paramA.value_or( 0 ) << std::endl;
                    if( plugin.paramVector )
                    {
                        std::cout << "Param Vector: ";
                        for( const auto& v: *plugin.paramVector )
                        {
                            std::cout << v << " ";
                        }
                        std::cout << std::endl;
                    }
                }
                else if constexpr( std::is_same_v<Type, plugin_namespace::PluginB> )
                {
                    std::cout << "Type: PluginB" << std::endl;
                    std::cout << "Name: " << plugin.name << std::endl;
                    std::cout << "Description: " << plugin.description << std::endl;
                    std::cout << "Param B: " << plugin.paramB.value_or( 0.0f ) << std::endl;
                    if( plugin.paramEnum )
                    {
                        std::cout << "Param Enum: " << static_cast<int>( *plugin.paramEnum ) << std::endl;
                    }
                }
                else if constexpr( std::is_same_v<Type, plugin_namespace::PluginBase> )
                {
                    std::cout << "Type: PluginBase" << std::endl;
                    std::cout << "Name: " << plugin.name << std::endl;
                    std::cout << "Description: " << plugin.description << std::endl;
                }
                else
                {
                    unknown_plugin_type_static_assert<Type>( );
                }
            };

            plugin.visit( visitor );
        }
    }
    else
    {
        std::cout << "No plugins loaded." << std::endl;
    }

    return 0;
}

When running the C++ application, it will read the configuration file generated by the Python script and print the plugin information.

$ python example.py
$ cat example.json
{
    "plugin_map": {
        "plugin1": {
            "name": "Plugin A",
            "description": "This is Plugin A",
            "type": "PluginA",
            "paramA": 42,
            "paramVector": [
                1.0,
                2.0,
                3.0
            ]
        },
        "plugin2": {
            "name": "Plugin B",
            "description": "This is Plugin B",
            "type": "PluginB",
            "paramB": 3.14,
            "paramEnum": "value1"
        }
    }
}
$ ./example example.json
Plugin Key: plugin1
Type: PluginA
Name: Plugin A
Description: This is Plugin A
Param A: 42
Param Vector: 1 2 3
Plugin Key: plugin2
Type: PluginB
Name: Plugin B
Description: This is Plugin B
Param B: 3.14
Param Enum: 0

Quick start guide for poly-scribe

Step 1: Define the WebIDL

Step 2: Generate the Code

Step 3: Use the Generated Code

Quick start guide for `poly-scribe`