/*
 * virnuma.c: helper APIs for managing numa
 *
 * Copyright (C) 2011-2014 Red Hat, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library.  If not, see
 * <http://www.gnu.org/licenses/>.
 *
 */

#include <config.h>

#define NUMA_MAX_N_CPUS 4096

#if WITH_NUMACTL
# define NUMA_VERSION1_COMPATIBILITY 1
# include <numa.h>

# if LIBNUMA_API_VERSION > 1
#  undef NUMA_MAX_N_CPUS
#  define NUMA_MAX_N_CPUS (numa_all_cpus_ptr->size)
# endif

#endif /* WITH_NUMACTL */

#include <sys/types.h>
#include <dirent.h>

#include "virnuma.h"
#include "vircommand.h"
#include "virerror.h"
#include "virlog.h"
#include "viralloc.h"
#include "virbitmap.h"
#include "virstring.h"
#include "virfile.h"
#include "virhostmem.h"

#define VIR_FROM_THIS VIR_FROM_NONE

VIR_LOG_INIT("util.numa");


#if HAVE_NUMAD
char *
virNumaGetAutoPlacementAdvice(unsigned short vcpus,
                              unsigned long long balloon)
{
    VIR_AUTOPTR(virCommand) cmd = NULL;
    char *output = NULL;

    cmd = virCommandNewArgList(NUMAD, "-w", NULL);
    virCommandAddArgFormat(cmd, "%d:%llu", vcpus,
                           VIR_DIV_UP(balloon, 1024));

    virCommandSetOutputBuffer(cmd, &output);

    if (virCommandRun(cmd, NULL) < 0)
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Failed to query numad for the "
                         "advisory nodeset"));

    return output;
}
#else /* !HAVE_NUMAD */
char *
virNumaGetAutoPlacementAdvice(unsigned short vcpus ATTRIBUTE_UNUSED,
                              unsigned long long balloon ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                   _("numad is not available on this host"));
    return NULL;
}
#endif /* !HAVE_NUMAD */

#if WITH_NUMACTL
int
virNumaSetupMemoryPolicy(virDomainNumatuneMemMode mode,
                         virBitmapPtr nodeset)
{
    nodemask_t mask;
    int node = -1;
    int ret = -1;
    int bit = 0;
    size_t i;
    int maxnode = 0;

    if (!nodeset)
        return 0;

    if (!virNumaNodesetIsAvailable(nodeset))
        return -1;

    maxnode = numa_max_node();
    maxnode = maxnode < NUMA_NUM_NODES ? maxnode : NUMA_NUM_NODES;

    /* Convert nodemask to NUMA bitmask. */
    nodemask_zero(&mask);
    bit = -1;
    while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) {
        if (bit > maxnode) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("NUMA node %d is out of range"), bit);
            return -1;
        }
        nodemask_set(&mask, bit);
    }

    switch (mode) {
    case VIR_DOMAIN_NUMATUNE_MEM_STRICT:
        numa_set_bind_policy(1);
        numa_set_membind(&mask);
        numa_set_bind_policy(0);
        break;

    case VIR_DOMAIN_NUMATUNE_MEM_PREFERRED:
    {
        int nnodes = 0;
        for (i = 0; i < NUMA_NUM_NODES; i++) {
            if (nodemask_isset(&mask, i)) {
                node = i;
                nnodes++;
            }
        }

        if (nnodes != 1) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("NUMA memory tuning in 'preferred' mode "
                                   "only supports single node"));
            goto cleanup;
        }

        numa_set_bind_policy(0);
        numa_set_preferred(node);
    }
    break;

    case VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE:
        numa_set_interleave_mask(&mask);
        break;

    case VIR_DOMAIN_NUMATUNE_MEM_LAST:
        break;
    }
    ret = 0;

 cleanup:
    return ret;
}

bool
virNumaIsAvailable(void)
{
    return numa_available() != -1;
}


/**
 * virNumaGetMaxNode:
 * Get the highest node number available on the current system.
 * (See the node numbers in /sys/devices/system/node/ ).
 *
 * Returns the highest NUMA node id on success, -1 on error.
 */
int
virNumaGetMaxNode(void)
{
    int ret;

    if (!virNumaIsAvailable()) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("NUMA isn't available on this host"));
        return -1;
    }

    if ((ret = numa_max_node()) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Failed to request maximum NUMA node id"));
        return -1;
    }

    return ret;
}


/**
 * virNumaGetNodeMemory:
 * @node: identifier of the requested NUMA node
 * @memsize: returns the total size of memory in the NUMA node
 * @memfree: returns the total free memory in a NUMA node
 *
 * Returns the size of the memory in one NUMA node in bytes via the @size
 * argument and free memory of a node in the @free argument.  The caller has to
 * guarantee that @node is in range (see virNumaGetMaxNode).
 *
 * Returns 0 on success, -1 on error. Does not report errors.
 */
int
virNumaGetNodeMemory(int node,
                     unsigned long long *memsize,
                     unsigned long long *memfree)
{
    long long node_size;
    long long node_free;

    if (memsize)
        *memsize = 0;

    if (memfree)
        *memfree = 0;

    if ((node_size = numa_node_size64(node, &node_free)) < 0)
        return -1;

    if (memsize)
        *memsize = node_size;

    if (memfree)
        *memfree = node_free;

    return 0;
}


/**
 * virNumaGetNodeCPUs:
 * @node: identifier of the requested NUMA node
 * @cpus: returns a bitmap of CPUs in @node
 *
 * Returns count of CPUs in the selected node and sets the map of the cpus to
 * @cpus. On error if the @node doesn't exist in the system this function
 * returns -2 and sets @cpus to NULL. On other errors -1 is returned, @cpus
 * is set to NULL and an error is reported.
 */

# define n_bits(var) (8 * sizeof(var))
# define MASK_CPU_ISSET(mask, cpu) \
  (((mask)[((cpu) / n_bits(*(mask)))] >> ((cpu) % n_bits(*(mask)))) & 1)
int
virNumaGetNodeCPUs(int node,
                   virBitmapPtr *cpus)
{
    int ncpus = 0;
    int max_n_cpus = virNumaGetMaxCPUs();
    int mask_n_bytes = max_n_cpus / 8;
    size_t i;
    VIR_AUTOFREE(unsigned long *) mask = NULL;
    VIR_AUTOFREE(unsigned long *) allonesmask = NULL;
    VIR_AUTOPTR(virBitmap) cpumap = NULL;

    *cpus = NULL;

    if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof(*mask)) < 0)
        return -1;

    if (VIR_ALLOC_N(allonesmask, mask_n_bytes / sizeof(*mask)) < 0)
        return -1;

    memset(allonesmask, 0xff, mask_n_bytes);

    /* The first time this returns -1, ENOENT if node doesn't exist... */
    if (numa_node_to_cpus(node, mask, mask_n_bytes) < 0) {
        VIR_WARN("NUMA topology for cell %d is not available, ignoring", node);
        return -2;
    }

    /* second, third... times it returns an all-1's mask */
    if (memcmp(mask, allonesmask, mask_n_bytes) == 0) {
        VIR_DEBUG("NUMA topology for cell %d is invalid, ignoring", node);
        return -2;
    }

    if (!(cpumap = virBitmapNew(max_n_cpus)))
        return -1;

    for (i = 0; i < max_n_cpus; i++) {
        if (MASK_CPU_ISSET(mask, i)) {
            ignore_value(virBitmapSetBit(cpumap, i));
            ncpus++;
        }
    }

    VIR_STEAL_PTR(*cpus, cpumap);
    return ncpus;
}
# undef MASK_CPU_ISSET
# undef n_bits

#else /* !WITH_NUMACTL */

int
virNumaSetupMemoryPolicy(virDomainNumatuneMemMode mode ATTRIBUTE_UNUSED,
                         virBitmapPtr nodeset)
{
    if (!virNumaNodesetIsAvailable(nodeset))
        return -1;

    return 0;
}

bool
virNumaIsAvailable(void)
{
    return false;
}


int
virNumaGetMaxNode(void)
{
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                   _("NUMA isn't available on this host"));
    return -1;
}


int
virNumaGetNodeMemory(int node ATTRIBUTE_UNUSED,
                     unsigned long long *memsize,
                     unsigned long long *memfree)
{
    if (memsize)
        *memsize = 0;

    if (memfree)
        *memfree = 0;

    VIR_DEBUG("NUMA isn't available on this host");
    return -1;
}


int
virNumaGetNodeCPUs(int node ATTRIBUTE_UNUSED,
                   virBitmapPtr *cpus)
{
    *cpus = NULL;

    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                   _("NUMA isn't available on this host"));
    return -1;
}
#endif /* !WITH_NUMACTL */

/**
 * virNumaGetMaxCPUs:
 *
 * Get the maximum count of CPUs supportable in the host.
 *
 * Returns the count of CPUs supported.
 */
unsigned int
virNumaGetMaxCPUs(void)
{
    return NUMA_MAX_N_CPUS;
}


#if WITH_NUMACTL && HAVE_NUMA_BITMASK_ISBITSET
/**
 * virNumaNodeIsAvailable:
 * @node: node to check
 *
 * On some hosts the set of NUMA nodes isn't continuous.
 * Use this function to test if the @node is available.
 *
 * Returns: true if @node is available,
 *          false if @node doesn't exist
 */
bool
virNumaNodeIsAvailable(int node)
{
    return numa_bitmask_isbitset(numa_nodes_ptr, node);
}


/**
 * virNumaGetDistances:
 * @node: identifier of the requested NUMA node
 * @distances: array of distances to sibling nodes
 * @ndistances: size of @distances
 *
 * Get array of distances to sibling nodes from @node. If a
 * distances[x] equals to zero, the node x is not enabled or
 * doesn't exist. As a special case, if @node itself refers to
 * disabled or nonexistent NUMA node, then @distances and
 * @ndistances are set to NULL and zero respectively.
 *
 * The distances are a bit of magic. For a local node the value
 * is 10, for remote it's typically 20 meaning that time penalty
 * for accessing a remote node is two time bigger than when
 * accessing a local node.
 *
 * Returns 0 on success, -1 otherwise.
 */
int
virNumaGetDistances(int node,
                    int **distances,
                    int *ndistances)
{
    int ret = -1;
    int max_node;
    size_t i;

    if (!virNumaNodeIsAvailable(node)) {
        VIR_DEBUG("Node %d does not exist", node);
        *distances = NULL;
        *ndistances = 0;
        return 0;
    }

    if ((max_node = virNumaGetMaxNode()) < 0)
        goto cleanup;

    if (VIR_ALLOC_N(*distances, max_node + 1) < 0)
        goto cleanup;

    *ndistances = max_node + 1;

    for (i = 0; i <= max_node; i++) {
        if (!virNumaNodeIsAvailable(node))
            continue;

        (*distances)[i] = numa_distance(node, i);
    }

    ret = 0;
 cleanup:
    return ret;
}

#else /* !(WITH_NUMACTL && HAVE_NUMA_BITMASK_ISBITSET) */

bool
virNumaNodeIsAvailable(int node)
{
    int max_node = virNumaGetMaxNode();

    if (max_node < 0)
        return false;

    /* Do we have anything better? */
    return (node >= 0) && (node <= max_node);
}


int
virNumaGetDistances(int node ATTRIBUTE_UNUSED,
                    int **distances,
                    int *ndistances)
{
    *distances = NULL;
    *ndistances = 0;
    VIR_DEBUG("NUMA distance information isn't available on this host");
    return 0;
}
#endif /* !(WITH_NUMACTL && HAVE_NUMA_BITMASK_ISBITSET) */


/* currently all the huge page stuff below is linux only */
#ifdef __linux__

# define HUGEPAGES_NUMA_PREFIX "/sys/devices/system/node/"
# define HUGEPAGES_SYSTEM_PREFIX "/sys/kernel/mm/hugepages/"
# define HUGEPAGES_PREFIX "hugepages-"

static int
virNumaGetHugePageInfoPath(char **path,
                           int node,
                           unsigned int page_size,
                           const char *suffix)
{
    int ret;

    if (node == -1) {
        /* We are aiming at overall system info */
        ret = virAsprintf(path,
                          HUGEPAGES_SYSTEM_PREFIX HUGEPAGES_PREFIX "%ukB/%s",
                          page_size, suffix ? suffix : "");
    } else {
        /* We are aiming on specific NUMA node */
        ret = virAsprintf(path,
                          HUGEPAGES_NUMA_PREFIX "node%d/hugepages/"
                          HUGEPAGES_PREFIX "%ukB/%s",
                          node, page_size, suffix ? suffix : "");
    }

    if (ret >= 0 && !virFileExists(*path)) {
        ret = -1;
        if (node != -1) {
            if (!virNumaNodeIsAvailable(node)) {
                virReportError(VIR_ERR_OPERATION_FAILED,
                               _("NUMA node %d is not available"),
                               node);
            } else {
                virReportError(VIR_ERR_OPERATION_FAILED,
                               _("page size %u is not available on node %d"),
                               page_size, node);
            }
        } else {
            virReportError(VIR_ERR_OPERATION_FAILED,
                           _("page size %u is not available"),
                           page_size);
        }
    }

    return ret;
}

static int
virNumaGetHugePageInfoDir(char **path, int node)
{
    if (node == -1) {
        return VIR_STRDUP(*path, HUGEPAGES_SYSTEM_PREFIX);
    } else {
        return virAsprintf(path,
                           HUGEPAGES_NUMA_PREFIX "node%d/hugepages/",
                           node);
    }
}

/**
 * virNumaGetHugePageInfo:
 * @node: NUMA node id
 * @page_size: which huge page are we interested in
 * @page_avail: total number of huge pages in the pool
 * @page_free: the number of free huge pages in the pool
 *
 * For given NUMA node and huge page size fetch information on
 * total number of huge pages in the pool (both free and taken)
 * and count for free huge pages in the pool.
 *
 * If you're interested in just one bit, pass NULL to the other one.
 *
 * As a special case, if @node == -1, overall info is fetched
 * from the system.
 *
 * Returns 0 on success, -1 otherwise (with error reported).
 */
static int
virNumaGetHugePageInfo(int node,
                       unsigned int page_size,
                       unsigned long long *page_avail,
                       unsigned long long *page_free)
{
    char *end;
    VIR_AUTOFREE(char *) path = NULL;
    VIR_AUTOFREE(char *) buf = NULL;

    if (page_avail) {
        if (virNumaGetHugePageInfoPath(&path, node,
                                       page_size, "nr_hugepages") < 0)
            return -1;

        if (virFileReadAll(path, 1024, &buf) < 0)
            return -1;

        if (virStrToLong_ull(buf, &end, 10, page_avail) < 0 ||
            *end != '\n') {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("unable to parse: %s"),
                           buf);
            return -1;
        }
        VIR_FREE(buf);
        VIR_FREE(path);
    }

    if (page_free) {
        if (virNumaGetHugePageInfoPath(&path, node,
                                       page_size, "free_hugepages") < 0)
            return -1;

        if (virFileReadAll(path, 1024, &buf) < 0)
            return -1;

        if (virStrToLong_ull(buf, &end, 10, page_free) < 0 ||
            *end != '\n') {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("unable to parse: %s"),
                           buf);
            return -1;
        }
    }

    return 0;
}

/**
 * virNumaGetPageInfo:
 * @node: NUMA node id
 * @page_size: which huge page are we interested in (in KiB)
 * @huge_page_sum: the sum of memory taken by huge pages (in
 * bytes)
 * @page_avail: total number of huge pages in the pool
 * @page_free: the number of free huge pages in the pool
 *
 * For given NUMA node and page size fetch information on
 * total number of pages in the pool (both free and taken)
 * and count for free pages in the pool.
 *
 * The @huge_page_sum parameter exists due to the Linux kernel
 * limitation. The problem is, if there are some huge pages
 * allocated, they are accounted under the 'MemUsed' field in the
 * meminfo file instead of being subtracted from the 'MemTotal'.
 * We must do the subtraction ourselves.
 * If unsure, pass 0.
 *
 * If you're interested in just one bit, pass NULL to the other one.
 *
 * As a special case, if @node == -1, overall info is fetched
 * from the system.
 *
 * Returns 0 on success, -1 otherwise (with error reported).
 */
int
virNumaGetPageInfo(int node,
                   unsigned int page_size,
                   unsigned long long huge_page_sum,
                   unsigned long long *page_avail,
                   unsigned long long *page_free)
{
    int ret = -1;
    long system_page_size = virGetSystemPageSize();

    /* sysconf() returns page size in bytes,
     * the @page_size is however in kibibytes */
    if (page_size == system_page_size / 1024) {
        unsigned long long memsize, memfree;

        /* TODO: come up with better algorithm that takes huge pages into
         * account. The problem is huge pages cut off regular memory. */
        if (node == -1) {
            if (virHostMemGetInfo(&memsize, &memfree) < 0)
                goto cleanup;
        } else {
            if (virNumaGetNodeMemory(node, &memsize, &memfree) < 0)
                goto cleanup;
        }

        /* see description above */
        memsize -= huge_page_sum;

        if (page_avail)
            *page_avail = memsize / system_page_size;

        if (page_free)
            *page_free = memfree / system_page_size;
    } else {
        if (virNumaGetHugePageInfo(node, page_size, page_avail, page_free) < 0)
            goto cleanup;
    }

    ret = 0;
 cleanup:
    return ret;
}


/**
 * virNumaGetPages:
 * @node: NUMA node id
 * @pages_size: list of pages supported on @node
 * @pages_avail: list of the pool sizes on @node
 * @pages_free: list of free pages on @node
 * @npages: the lists size
 *
 * For given NUMA node fetch info on pages. The size of pages
 * (e.g.  4K, 2M, 1G) is stored into @pages_size, the size of the
 * pool is then stored into @pages_avail and the number of free
 * pages in the pool is stored into @pages_free.
 *
 * If you're interested only in some lists, pass NULL to the
 * other ones.
 *
 * As a special case, if @node == -1, overall info is fetched
 * from the system.
 *
 * Returns 0 on success, -1 otherwise.
 */
int
virNumaGetPages(int node,
                unsigned int **pages_size,
                unsigned long long **pages_avail,
                unsigned long long **pages_free,
                size_t *npages)
{
    int ret = -1;
    DIR *dir = NULL;
    int direrr = 0;
    struct dirent *entry;
    unsigned int ntmp = 0;
    size_t i;
    bool exchange;
    long system_page_size;
    unsigned long long huge_page_sum = 0;
    VIR_AUTOFREE(char *) path = NULL;
    VIR_AUTOFREE(unsigned int *) tmp_size = NULL;
    VIR_AUTOFREE(unsigned long long *) tmp_avail = NULL;
    VIR_AUTOFREE(unsigned long long *) tmp_free = NULL;

    /* sysconf() returns page size in bytes,
     * but we are storing the page size in kibibytes. */
    system_page_size = virGetSystemPageSizeKB();

    /* Query huge pages at first.
     * On Linux systems, the huge pages pool cuts off the available memory and
     * is always shown as used memory. Here, however, we want to report
     * slightly different information. So we take the total memory on a node
     * and subtract memory taken by the huge pages. */
    if (virNumaGetHugePageInfoDir(&path, node) < 0)
        goto cleanup;

    /* It's okay if the @path doesn't exist. Maybe we are running on
     * system without huge pages support where the path may not exist. */
    if (virDirOpenIfExists(&dir, path) < 0)
        goto cleanup;

    while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
        const char *page_name = entry->d_name;
        unsigned int page_size;
        unsigned long long page_avail = 0;
        unsigned long long page_free = 0;
        char *end;

        /* Just to give you a hint, we're dealing with this:
         * hugepages-2048kB/  or   hugepages-1048576kB/ */
        if (!STRPREFIX(entry->d_name, HUGEPAGES_PREFIX))
            continue;

        page_name += strlen(HUGEPAGES_PREFIX);

        if (virStrToLong_ui(page_name, &end, 10, &page_size) < 0 ||
            STRCASENEQ(end, "kB")) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("unable to parse %s"),
                           entry->d_name);
            goto cleanup;
        }

        if (virNumaGetHugePageInfo(node, page_size,
                                   &page_avail, &page_free) < 0)
            goto cleanup;

        if (VIR_REALLOC_N(tmp_size, ntmp + 1) < 0 ||
            VIR_REALLOC_N(tmp_avail, ntmp + 1) < 0 ||
            VIR_REALLOC_N(tmp_free, ntmp + 1) < 0)
            goto cleanup;

        tmp_size[ntmp] = page_size;
        tmp_avail[ntmp] = page_avail;
        tmp_free[ntmp] = page_free;
        ntmp++;

        /* page_size is in kibibytes while we want huge_page_sum
         * in just bytes. */
        huge_page_sum += 1024 * page_size * page_avail;
    }

    if (direrr < 0)
        goto cleanup;

    /* Now append the ordinary system pages */
    if (VIR_REALLOC_N(tmp_size, ntmp + 1) < 0 ||
        VIR_REALLOC_N(tmp_avail, ntmp + 1) < 0 ||
        VIR_REALLOC_N(tmp_free, ntmp + 1) < 0)
        goto cleanup;

    if (virNumaGetPageInfo(node, system_page_size, huge_page_sum,
                           &tmp_avail[ntmp], &tmp_free[ntmp]) < 0)
        goto cleanup;
    tmp_size[ntmp] = system_page_size;
    ntmp++;

    /* Just to produce nice output, sort the arrays by increasing page size */
    do {
        exchange = false;
        for (i = 0; i < ntmp -1; i++) {
            if (tmp_size[i] > tmp_size[i + 1]) {
                exchange = true;
                SWAP(tmp_size[i], tmp_size[i + 1]);
                SWAP(tmp_avail[i], tmp_avail[i + 1]);
                SWAP(tmp_free[i], tmp_free[i + 1]);
            }
        }
    } while (exchange);

    if (pages_size) {
        *pages_size = tmp_size;
        tmp_size = NULL;
    }
    if (pages_avail) {
        *pages_avail = tmp_avail;
        tmp_avail = NULL;
    }
    if (pages_free) {
        *pages_free = tmp_free;
        tmp_free = NULL;
    }
    *npages = ntmp;
    ret = 0;
 cleanup:
    VIR_DIR_CLOSE(dir);
    return ret;
}


int
virNumaSetPagePoolSize(int node,
                       unsigned int page_size,
                       unsigned long long page_count,
                       bool add)
{
    char *end;
    unsigned long long nr_count;
    VIR_AUTOFREE(char *) nr_path = NULL;
    VIR_AUTOFREE(char *) nr_buf =  NULL;

    if (page_size == virGetSystemPageSizeKB()) {
        /* Special case as kernel handles system pages
         * differently to huge pages. */
        virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
                       _("system pages pool can't be modified"));
        return -1;
    }

    if (virNumaGetHugePageInfoPath(&nr_path, node, page_size, "nr_hugepages") < 0)
        return -1;

    /* Firstly check, if there's anything for us to do */
    if (virFileReadAll(nr_path, 1024, &nr_buf) < 0)
        return -1;

    if (virStrToLong_ull(nr_buf, &end, 10, &nr_count) < 0 ||
        *end != '\n') {
        virReportError(VIR_ERR_OPERATION_FAILED,
                       _("invalid number '%s' in '%s'"),
                       nr_buf, nr_path);
        return -1;
    }

    if (add) {
        if (!page_count) {
            VIR_DEBUG("Nothing left to do: add = true page_count = 0");
            return 0;
        }
        page_count += nr_count;
    } else {
        if (nr_count == page_count) {
            VIR_DEBUG("Nothing left to do: nr_count = page_count = %llu",
                      page_count);
            return 0;
        }
    }

    /* Okay, page pool adjustment must be done in two steps. In
     * first we write the desired number into nr_hugepages file.
     * Kernel then starts to allocate the pages (return from
     * write should be postponed until the kernel is finished).
     * However, kernel may have not been successful and reserved
     * all the pages we wanted. So do the second read to check.
     */
    VIR_FREE(nr_buf);
    if (virAsprintf(&nr_buf, "%llu", page_count) < 0)
        return -1;

    if (virFileWriteStr(nr_path, nr_buf, 0) < 0) {
        virReportSystemError(errno,
                             _("Unable to write to: %s"), nr_path);
        return -1;
    }

    /* And now do the check. */

    VIR_FREE(nr_buf);
    if (virFileReadAll(nr_path, 1024, &nr_buf) < 0)
        return -1;

    if (virStrToLong_ull(nr_buf, &end, 10, &nr_count) < 0 ||
        *end != '\n') {
        virReportError(VIR_ERR_OPERATION_FAILED,
                       _("invalid number '%s' in '%s'"),
                       nr_buf, nr_path);
        return -1;
    }

    if (nr_count != page_count) {
        virReportError(VIR_ERR_OPERATION_FAILED,
                       _("Unable to allocate %llu pages. Allocated only %llu"),
                       page_count, nr_count);
        return -1;
    }

    return 0;
}


#else /* #ifdef __linux__ */
int
virNumaGetPageInfo(int node ATTRIBUTE_UNUSED,
                   unsigned int page_size ATTRIBUTE_UNUSED,
                   unsigned long long huge_page_sum ATTRIBUTE_UNUSED,
                   unsigned long long *page_avail ATTRIBUTE_UNUSED,
                   unsigned long long *page_free ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
                   _("page info is not supported on this platform"));
    return -1;
}


int
virNumaGetPages(int node ATTRIBUTE_UNUSED,
                unsigned int **pages_size ATTRIBUTE_UNUSED,
                unsigned long long **pages_avail ATTRIBUTE_UNUSED,
                unsigned long long **pages_free ATTRIBUTE_UNUSED,
                size_t *npages ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
                   _("page info is not supported on this platform"));
    return -1;
}


int
virNumaSetPagePoolSize(int node ATTRIBUTE_UNUSED,
                       unsigned int page_size ATTRIBUTE_UNUSED,
                       unsigned long long page_count ATTRIBUTE_UNUSED,
                       bool add ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
                   _("page pool allocation is not supported on this platform"));
    return -1;
}
#endif /* #ifdef __linux__ */

bool
virNumaNodesetIsAvailable(virBitmapPtr nodeset)
{
    ssize_t bit = -1;

    if (!nodeset)
        return true;

    while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) {
        if (virNumaNodeIsAvailable(bit))
            continue;

        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("NUMA node %zd is unavailable"), bit);
        return false;
    }
    return true;
}


/**
 * virNumaGetHostMemoryNodeset:
 *
 * Returns a bitmap of guest numa node ids that contain memory.
 */
virBitmapPtr
virNumaGetHostMemoryNodeset(void)
{
    int maxnode = virNumaGetMaxNode();
    unsigned long long nodesize;
    size_t i = 0;
    virBitmapPtr nodeset = NULL;

    if (maxnode < 0)
        return NULL;

    if (!(nodeset = virBitmapNew(maxnode + 1)))
        return NULL;

    for (i = 0; i <= maxnode; i++) {
        if (!virNumaNodeIsAvailable(i))
            continue;

        /* if we can't detect NUMA node size assume that it's present */
        if (virNumaGetNodeMemory(i, &nodesize, NULL) < 0 || nodesize > 0)
            ignore_value(virBitmapSetBit(nodeset, i));
    }

    return nodeset;
}
