DMYTRO SHYTYI

Open vSwitch internals

Hello, in this post we are going to describe the path from switchd main() to xlate_normal_flood() in ovs-vswitchd (Open vSwitch) 2.15.90.

We start with main in ovs-vswitchd.c:

 int
 main(int argc, char *argv[])
 {
 ...
     bridge_init(remote);
     free(remote);
 
     exiting = false;
     cleanup = false;
     while (!exiting) {
  ...
         bridge_run(); <<<<<<<<<<<<<<<<<<<<<<<<<<
  ...
         bridge_wait();
  ...
         bridge_exit(cleanup);

     return 0;
 }

main() -> bridge_run()

bridge_run(void)
 {
 ...
     bridge_init_ofproto(cfg);
 ...
     bridge_run__();<<<<<<<<<<<<<<<<<<<<
 ...
 }

bridge_run() -> bridge_run__();

 bridge_run__(void)
 {
  ...
   ofproto_enumerate_types(&types;);
     SSET_FOR_EACH (type, &types;) {
         ofproto_type_run(type); <<<<<<<<<<<<<<<<<<<<<<<
     }
   ...
     /* Let each bridge do the work that it needs to do. */
     HMAP_FOR_EACH (br, node, &all_bridges) {
         ofproto_run(br->ofproto);
     }
 }

bridge_run__() -> ofproto_type_run()

 ofproto_type_run(const char *datapath_type)
 {
 ...
     datapath_type = ofproto_normalize_type(datapath_type);
     class = ofproto_class_find__(datapath_type);
     error = class->type_run ? class->type_run(datapath_type) : 0;<<<<<<<<<<<<<<<<<<
 ...
     if (error && error != EAGAIN) {
         VLOG_ERR_RL(&rl, "%s: type_run failed (%s)",
                     datapath_type, ovs_strerror(error));
     }
     return error;
 }

ofproto_type_run() -> type_run()

type_run(const char *type)
 {
     struct dpif_backer *backer;
 
     backer = shash_find_data(&all;_dpif_backers, type);
  ...
     if (backer->recv_set_enable) {
         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);<<<<<<<<<<<<<<<<<<
     }
  ...
         backer->need_revalidate = 0;
 
         xlate_txn_start();
  ...
         xlate_txn_commit();
 
         udpif_revalidate(backer->udpif);
     }
 
     process_dpif_port_changes(backer);
     ct_zone_timeout_policy_sweep(backer);
 
     return 0;
 }

type_run() -> udpif_set_threads()

 udpif_set_threads(struct udpif *udpif, size_t n_handlers_,
                   size_t n_revalidators_)
 {
     ovs_assert(udpif);
     ovs_assert(n_handlers_ && n_revalidators_);
 ...
         udpif_start_threads(udpif, n_handlers_, n_revalidators_);<<<<<<<<<<<<<<<<<
     }
 }

udpif_set_threads() -> udpif_start_threads()

udpif_start_threads(struct udpif *udpif, size_t n_handlers_,
                     size_t n_revalidators_)
 {
 
         for (size_t i = 0; i < udpif->n_handlers; i++) {
             struct handler *handler = &udpif-;>handlers[i];
 
             handler->udpif = udpif;
             handler->handler_id = i;
             handler->thread = ovs_thread_create(
                 "handler", udpif_upcall_handler, handler);<<<<<<<<<<<<<<<<<<
         }
... 
     }
 }

udpif_start_threads() -> udpif_upcall_handler()

udpif_upcall_handler(void *arg)
 {
     struct handler *handler = arg;
     struct udpif *udpif = handler->udpif;
 
         if (recv_upcalls(handler)) { <<<<<<<<<<<<<<<<<<<<<<
             poll_immediate_wake();
         } else {
              ...
         }
         poll_block();
     }
 
 }

udpif_upcall_handler -> recv_upcalls()

 static size_t
 recv_upcalls(struct handler *handler)
 {
     ....
     n_upcalls = 0;
     while (n_upcalls < UPCALL_MAX_BATCH) {
         ...

         upcall->key = dupcall->key;
         upcall->key_len = dupcall->key_len;
         upcall->ufid = &dupcall-;>ufid;
         upcall->hash = hash;
 
         upcall->out_tun_key = dupcall->out_tun_key;
         upcall->actions = dupcall->actions;

 
         error = process_upcall(udpif, upcall,
                                &upcall-;>odp_actions, &upcall-;>wc);<<<<<<<<<<<<<<<<<<<<<
...

recv_upcalls() -> process_upcall()

static int
 process_upcall(struct udpif *udpif, struct upcall *upcall,
                struct ofpbuf *odp_actions, struct flow_wildcards *wc)
 {
     const struct dp_packet *packet = upcall->packet;
     const struct flow *flow = upcall->flow;
     size_t actions_len = 0;
 
     switch (upcall->type) {
     case MISS_UPCALL:
     case SLOW_PATH_UPCALL:
         upcall_xlate(udpif, upcall, odp_actions, wc);<<<<<<<<<<<<<<<<<<<<<<<<
         return 0;
   ...

process_upcall() -> upcall_xlate()

static void
 upcall_xlate(struct udpif *udpif, struct upcall *upcall,
              struct ofpbuf *odp_actions, struct flow_wildcards *wc)
 {
...
     xlate_in_init(&xin;, upcall->ofproto,
                   ofproto_dpif_get_tables_version(upcall->ofproto),
                   upcall->flow, upcall->ofp_in_port, NULL,
                   stats.tcp_flags, upcall->packet, wc, odp_actions);
 
     if (upcall->type == MISS_UPCALL) {
         xin.resubmit_stats = &stats;
 
         if (xin.frozen_state) {
             upcall->recirc = recirc_id_node_from_state(xin.frozen_state);
             upcall->have_recirc_ref = recirc_id_node_try_ref_rcu(upcall->recirc);
         }
 ...
     upcall->reval_seq = seq_read(udpif->reval_seq);
 ...
     xerr = xlate_actions(&xin;, &upcall-;>xout);<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

upcall_xlate() -> xlate_actions()

enum xlate_error
 xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
 {
     *xout = (struct xlate_out) {
         .slow = 0,
         .recircs = RECIRC_REFS_EMPTY_INITIALIZER,
     };
 if (xin->frozen_state) {
 } else {
    do_xlate_actions(ofpacts, ofpacts_len, &ctx;, true, false);<<<<<<<<<<<<<<<<<<<
...

xlate_actions() -> do_xlate_actions()

static void
 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                  struct xlate_ctx *ctx, bool is_last_action,
                  bool group_bucket_action)
 {
...
switch (a->type) {
         case OFPACT_OUTPUT:
             xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
                                 ofpact_get_OUTPUT(a)->max_len, true, last,
                                 false, group_bucket_action);<<<<<<<<<<<<<<<<<<<
             break;
...

do_xlate_actions() -> xlate_output_action()

 xlate_output_action(struct xlate_ctx *ctx, ofp_port_t port,
                     uint16_t controller_len, bool may_packet_in,
                     bool is_last_action, bool truncate,
                     bool group_bucket_action)
 {
     ofp_port_t prev_nf_output_iface = ctx->nf_output_iface;
 
     ctx->nf_output_iface = NF_OUT_DROP;
 
     switch (port) {
...
     case OFPP_NORMAL:
         xlate_normal(ctx);<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
...

xlate_output_action() -> xlate_normal()

xlate_normal(struct xlate_ctx *ctx)
 {
...
  xlate_normal_flood(ctx, in_xbundle, &xvlan;);<<<<<<<<<<<<<<<<<<<<<<<<<<<
...
}

xlate_normal -> xlate_normal_flood()

And finally we added in the xlate_normal_flood():

if (ctx->xin->packet){
   in_packet = malloc(sizeof(struct dp_packet));
   memcpy(in_packet,ctx->xin->packet, sizeof(struct dp_packet));
   ...
   int error = xlate_send_packet(xport_v->ofport, false, in_packet);