2 Replies Latest reply on Nov 16, 2018 2:14 PM by yairi

    kworker has a high CPU usage

    vivienne

      delayed_cache_work_func

      Detected that the function executes millions of times per second 

      Is there any problem in the function that can't be quit.

      static void __cache_work_func(struct mlx5_cache_ent *ent)

      {

              struct mlx5_ib_dev *dev = ent->dev;

              struct mlx5_mr_cache *cache = &dev->cache;

              int i = order2idx(dev, ent->order);

              int err;

              s64 dtime;

              if (cache->stopped)

                      return;

              ent = &dev->cache.ent[i];

              if (ent->cur < 2 * ent->limit && !dev->fill_delay) {

                      err = add_keys(dev, i, 1);

                      if (ent->cur < 2 * ent->limit) {

                              if (err == -EAGAIN) {

                                      mlx5_ib_dbg(dev, "returned eagain, order %d\n",

                                                  i + 2);

                                      cancel_delayed_work(&ent->dwork);

                                      queue_delayed_work(cache->wq, &ent->dwork,

                                                         msecs_to_jiffies(3));

                              } else if (err) {

                                      mlx5_ib_warn(dev, "command failed order %d, err %d\n",

                                                   i + 2, err);

                                      cancel_delayed_work(&ent->dwork);

                                      queue_delayed_work(cache->wq, &ent->dwork,

                                                         msecs_to_jiffies(1000));

                              } else {

                                      queue_work(cache->wq, &ent->work);

                              }

                      }

              } else if (ent->cur > 2 * ent->limit) {

                      /*

                       * The remove_keys() logic is performed as garbage collection

                       * task. Such task is intended to be run when no other active

                       * processes are running.

                       *

                       * The need_resched() will return TRUE if there are user tasks

                       * to be activated in near future.

                       *

                       * In such case, we don't execute remove_keys() and postpone

                       * the garbage collection work to try to run in next cycle,

                       * in order to free CPU resources to other tasks.

                       */

                      dtime = (cache->last_add + (s64)cache->rel_timeout * HZ) -

                              jiffies;

                      if (cache->rel_imm ||

                          (cache->rel_timeout >= 0 && !someone_adding(cache) &&

                           dtime <= 0)) {

                              remove_keys(dev, i, 1);

                              if (ent->cur > ent->limit)

                                      queue_work(cache->wq, &ent->work);

                      } else if (cache->rel_timeout >= 0) {

                              dtime = max_t(s64, dtime, 0);

                              dtime = min_t(s64, dtime, (MAX_MR_RELEASE_TIMEOUT * HZ));

                              cancel_delayed_work(&ent->dwork);

                              queue_delayed_work(cache->wq, &ent->dwork, dtime);

                      }

              } else if (cache->rel_imm && !someone_releasing(cache)) {

                      cache->rel_imm = 0;

              }

      }

      static void delayed_cache_work_func(struct work_struct *work)

      {

              struct mlx5_cache_ent *ent;

              ent = container_of(work, struct mlx5_cache_ent, dwork.work);

              __cache_work_func(ent);

      }

      delayed_cache_work_func