Lossless RoCE Configuration for Spectrum-based Cumulus Switches in DSCP-Based QoS Mode

Version 2

    This post provides a configuration example for Mellanox Spectrum installed with Cumulus and running RoCE over a lossless network, in DSCP-based QoS mode.

    This post assumes Cumulus version 3.5 and above. For older versions see: Lossless RoCE Configuration for Spectrum-based Cumulus Switches in DSCP-Based QoS Mode (Ver. 3.6)

     

    Configuration

    Note - configurations files examples attached below

     

    Step 1 - /etc/cumulus/datapath/traffic.conf

    1. Enable ECN for priority 3

    ## File: /etc/cumulus/datapath/traffic.conf

    ecn_red.port_group_list = [ecn_red_port_group]

    ecn_red.ecn_red_port_group.cos_list = [3]

    ecn_red.ecn_red_port_group.port_set = swp1-swp32

    ecn_red.ecn_red_port_group.ecn_enable = true

    ecn_red.ecn_red_port_group.red_enable = false

    ecn_red.ecn_red_port_group.min_threshold_bytes = 153600

    ecn_red.ecn_red_port_group.max_threshold_bytes = 1536000

    ecn_red.ecn_red_port_group.probability = 100

    2. Set trust mode to DSCP, map DSCP values to COS

    ## File: /etc/cumulus/datapath/traffic.conf

    traffic.packet_priority_source_set = [dscp]
    traffic.cos_0.priority_source.dscp = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63] #for all Priorities

    traffic.cos_1.priority_source.dscp = []

    traffic.cos_2.priority_source.dscp = [48] # for CNPs

    traffic.cos_3.priority_source.dscp = [26] # for RoCE

    traffic.cos_4.priority_source.dscp = []

    traffic.cos_5.priority_source.dscp = []

    traffic.cos_6.priority_source.dscp = []

    traffic.cos_7.priority_source.dscp = []

    3. Map switch priority to priority groups

    ## File: /etc/cumulus/datapath/traffic.conf

    traffic.priority_group_list = [control, service, bulk]

    priority_group.control.cos_list = [2]

    priority_group.service.cos_list = [3]

    priority_group.bulk.cos_list = [0,1,4,5,6,7]

    4. Enable priority flow control for priority 3

    ## File: /etc/cumulus/datapath/traffic.conf

    pfc.port_group_list = [pfc_port_group]

    pfc.pfc_port_group.cos_list = [3]

    pfc.pfc_port_group.port_set = swp1-swp32

    pfc.pfc_port_group.port_buffer_bytes = 70000

    pfc.pfc_port_group.xoff_size = 18000

    pfc.pfc_port_group.xon_delta = 0

    pfc.pfc_port_group.tx_enable = true

    pfc.pfc_port_group.rx_enable = true

    5. Set strict priority for CNPs

    ## File: /etc/cumulus/datapath/traffic.conf

    priority_group.control.weight = 0

    priority_group.service.weight = 16

    priority_group.bulk.weight = 16

    Step 2 - /usr/lib/python2.7/dist-packages/cumulus/__chip_config/mlx/datapath.conf

    1. Assign group IDs and map to a pool

    priority_group.control.id = 0

    priority_group.service.id = 0

    priority_group.bulk.id = 0

    priority_group.control.service_pool = 0

    priority_group.service.service_pool = 0

    priority_group.bulk.service_pool = 0

    flow_control.service_pool = 1 # all lossless traffic is mapped to a unique pool (#1)

    2. Service pool buffer allocation

    ingress_service_pool.0.percent = 50.0  # all priority groups

    ingress_service_pool.0.mode = 1 # dynamic buffering

    ingress_service_pool.1.percent = 50.0 # all lossless traffic

    ingress_service_pool.1.mode = 1 # dynamic buffering

    egress_service_pool.0.percent = 50.0 # all lossy priority groups, UC and MC

    egress_service_pool.0.mode = 1

    egress_service_pool.1.percent = 100.0 # all lossless priority groups

    egress_service_pool.1.mode = 1

    3. Configure the reserved sizes for ingress/egress buffers per service

    # priority group minimum buffer allocation: percent of total buffer

    priority_group.control.ingress_buffer.min_percent   = 0.0

    priority_group.service.ingress_buffer.min_percent   = 0.0

    priority_group.bulk.ingress_buffer.min_percent      = 0.5

     

    priority_group.bulk.egress_buffer.uc.min_percent      =  0.0

    priority_group.service.egress_buffer.uc.min_percent   =  0.0

    priority_group.control.egress_buffer.uc.min_percent   =  0.0

     

    priority_group.bulk.egress_buffer.mc.min_percent      =  0.0

    priority_group.service.egress_buffer.mc.min_percent   =  0.0

    priority_group.control.egress_buffer.mc.min_percent   =  0.0

    flow_control.egress_buffer.min_percent = 1.0

     

    mc_buffer.port.percent = 0.5

    4. Configure the dynamic quota (alpha) for ingress/egress buffers per service

    priority_group.control.ingress_buffer.dynamic_quota = 11

    priority_group.service.ingress_buffer.dynamic_quota = 11

    priority_group.bulk.ingress_buffer.dynamic_quota    = 11

    flow_control.ingress_buffer.dynamic_quota = 9

    priority_group.bulk.egress_buffer.uc.sp_dynamic_quota    = 11

    priority_group.service.egress_buffer.uc.sp_dynamic_quota = 11

    priority_group.control.egress_buffer.uc.sp_dynamic_quota = 11

     

    priority_group.bulk.egress_buffer.mc.sp_dynamic_quota    = 9

    priority_group.service.egress_buffer.mc.sp_dynamic_quota = 9

    priority_group.control.egress_buffer.mc.sp_dynamic_quota = 9

     

    flow_control.egress_buffer.dynamic_quota = 255

    Step 3 - Apply configuration

    systemctl restart switchd.service

    References