/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
 *
 * Copyright 2025 GNOME Foundation, Inc.
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 * Authors:
 *  - Philip Withnall <pwithnall@gnome.org>
 */

#include <arpa/inet.h>
#include <assert.h>
#include <cdb.h>
#include <ctype.h>
#include <errno.h>
#include <err.h>
#include <fcntl.h>
#include <limits.h>
#include <nss.h>
#include <netdb.h>
#include <pwd.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>


/**
 * NSS web filtering module
 *
 * This is an NSS module implementing the `gethostbyname()` functions, which
 * applies a user-specific filter list to all name lookup requests to allow
 * filtering websites.
 *
 * Install it in `/etc/nsswitch.conf` using:
 * ```
 * hosts:      files myhostname malcontent mdns4_minimal [NOTFOUND=return] resolve [!UNAVAIL=return] dns
 * ```
 *
 * Build a filter list using `malcontent-webd`, or the following commands:
 * ```
 * wget https://v.firebog.net/hosts/Easylist.txt
 * cdb -c -m test.db Easylist.txt
 * sudo chmod o+r test.db
 * sudo mv test.db /var/lib/malcontent-nss/filter-lists/${username}
 * ```
 *
 * The input filter list must be a simple list of hostnames. Regexps or dot
 * prefixes are not allowed.
 *
 * The outputted database is in
 * [cdb format](https://www.corpit.ru/mjt/tinycdb.html), which is effectively a
 * key-value store.
 *
 * The database has several possible key-value formats:
 *  - `*` as a key (with an empty value) indicates that all domains should be
 *    blocked by default (i.e. the filter list is effectively an allow-list
 *    rather than a block-list)
 *  - A hostname as a key, with an empty value, indicates that hostname should
 *    be blocked.
 *  - A hostname as a key, with another hostname as a value, indicates that the
 *    first hostname should be redirected to the second one.
 *  - A hostname prefixed with `~` as a key, with an empty value, indicates that
 *    hostname should be _allowed_ (even if the `*` key is present in the file).
 *
 * The hostname `use-application-dns.net` is *always* blocked to indicate to
 * browsers that DNS-over-HTTPS should be disabled, as it bypasses this filter
 * module. See the
 * [Mozilla documentation](https://support.mozilla.org/en-US/kb/canary-domain-use-application-dnsnet).
 *
 * NSS documentation:
 *  - https://www.gnu.org/software/libc/manual/html_node/NSS-Modules-Interface.html
 *  - https://www.gnu.org/software/libc/manual/html_node/NSS-Module-Function-Internals.html
 *  - https://elixir.bootlin.com/glibc/glibc-2.41/source/nss/getaddrinfo.c
 */

/* Exported module API: */
enum nss_status _nss_malcontent_gethostbyname3_r (const char      *name,
                                                  int              af,
                                                  struct hostent  *result,
                                                  char            *buffer,
                                                  size_t           buffer_len,
                                                  int             *errnop,
                                                  int             *h_errnop,
                                                  int32_t         *ttlp,
                                                  char           **canonp);
enum nss_status _nss_malcontent_gethostbyname2_r (const char     *name,
                                                  int             af,
                                                  struct hostent *result,
                                                  char           *buffer,
                                                  size_t          buffer_len,
                                                  int            *errnop,
                                                  int            *h_errnop);
enum nss_status _nss_malcontent_sethostent (void);
enum nss_status _nss_malcontent_endhostent (void);


const char *filter_list_dir = "/var/lib/malcontent-nss/filter-lists/";

/* https://datatracker.ietf.org/doc/html/rfc2181#section-11 not including trailing nul */
#define HOSTNAME_MAX 255 /* bytes */

static inline size_t
align_as_pointer (size_t in)
{
  const size_t ptr_alignment = __alignof__ (void *);
  return (in + (ptr_alignment - 1)) & ~(ptr_alignment - 1);
}

static inline void
clear_fd (int *fd_ptr)
{
  /* Don't overwrite thread-local errno if closing the fd fails. We want to
   * ignore errors. */
  int errsv = errno;
  int fd = *fd_ptr;

  *fd_ptr = -1;

  if (fd < 0)
    return;

  close (fd);
  errno = errsv;
}

static inline void
clear_addrinfo (struct addrinfo **ai_ptr)
{
  /* Don't overwrite thread-local errno if closing the fd fails. We want to
   * ignore errors. */
  int errsv = errno;
  struct addrinfo *ai = *ai_ptr;

  *ai_ptr = NULL;

  if (ai == NULL)
    return;

  freeaddrinfo (ai);
  errno = errsv;
}

static int
lookup_username (uid_t    uid,
                 char    *buf,
                 size_t   buf_len)
{
  char buffer[4096];
  struct passwd pwbuf;
  struct passwd *result;
  int pwuid_errno;

  assert (buf_len >= LOGIN_NAME_MAX + 1);

  pwuid_errno = getpwuid_r (uid, &pwbuf, buffer, sizeof (buffer), &result);

  if (result != NULL &&
      result->pw_name != NULL && result->pw_name[0] != '\0')
    {
      strlcpy (buf, result->pw_name, buf_len);
      return 0;
    }
  else if (result != NULL)
    {
      snprintf (buf, buf_len, "%d", (int) uid);
      return 0;
    }
  else if (pwuid_errno == 0)
    {
      /* User not found. */
      buf[0] = '\0';
      return ENOENT;
    }
  else
    {
      /* Error calling getpwuid_r(). */
      buf[0] = '\0';
      return pwuid_errno;
    }

  /* Should not be reached */
  assert (0);
}

/* FIXME: This re-opens the database for each request. We could keep it open
 * between requests and use inotify to reload it, which would save a lot of CPU
 * time. */
static int
open_filter_list (int *out_filter_list_fd)
{
  int username_errno;
  char username[LOGIN_NAME_MAX + 1] = { '\0' };
  uid_t uid;
  char filter_list_file[NAME_MAX] = { '\0', };
  int filter_list_fd = -1;

  assert (out_filter_list_fd != NULL);

  /* Build the filter list filename using the effective user’s username. */
  uid = geteuid ();
  username_errno = lookup_username (uid, username, sizeof (username));
  if (username_errno != 0)
    return username_errno;

  strlcpy (filter_list_file, filter_list_dir, sizeof (filter_list_file));
  strlcat (filter_list_file, username, sizeof (filter_list_file));

  /* Open the filter_list file and read it using tinycdb */
  assert (filter_list_file[0] != '\0');

  filter_list_fd = open (filter_list_file, O_RDONLY | O_CLOEXEC);
  if (filter_list_fd < 0)
    {
      if (errno == ENOENT)
        {
          /* Filter list doesn’t exist for this user, ignore. */
          *out_filter_list_fd = -1;
          return 0;
        }
      else
        {
          /* Other error, bail out. */
          *out_filter_list_fd = -1;
          return errno;
        }
    }

  /* Success */
  *out_filter_list_fd = filter_list_fd;
  return 0;
}

static const char *
sockaddr_to_inet_addr (const struct sockaddr *sa,
                       size_t                 sa_len)
{
  if (sa->sa_family == AF_INET)
    return (const char *) &((const struct sockaddr_in *) sa)->sin_addr;
  else if (sa->sa_family == AF_INET6)
    return (const char *) &((const struct sockaddr_in6 *) sa)->sin6_addr;
  else
    assert (0);  /* should not be reached */
}

/* As per https://elixir.bootlin.com/glibc/glibc-2.41/source/nss/getaddrinfo.c,
 * glibc only calls gethostbyname4_r and gethostbyname3_r conditionally. If we
 * want to support the most possible queries (and versions of glibc), provide
 * gethostbyname2_r. glibc will handle the fallbacks for other API versions.
 *
 * We do need to provide a gethostbyname3_r() function, though, as that’s
 * explicitly called when AI_CANONNAME is set in the request flags. */
enum nss_status
_nss_malcontent_gethostbyname3_r (const char      *name,
                                  int              af,
                                  struct hostent  *result,
                                  char            *buffer,
                                  size_t           buffer_len,
                                  int             *errnop,
                                  int             *h_errnop,
                                  int32_t         *ttlp,
                                  char           **canonp)
{
  enum
    {
      UNKNOWN,
      ALLOW,
      REDIRECT,
      BLOCK,
    }
  domain_action = UNKNOWN;
  char redirect_hostname[HOSTNAME_MAX + 1] = { '\0' };  /* only used if domain_action == REDIRECT */
  struct addrinfo *redirect_addrinfo __attribute__((__cleanup__(clear_addrinfo))) = NULL;

  /* We need to call gethostbyname() recursively for redirects, but want other
   * NSS modules to handle it. */
  static bool recursing = false;

  if (recursing)
    {
      *errnop = EINVAL;
      *h_errnop = NO_ADDRESS;
      return NSS_STATUS_NOTFOUND;
    }

  /* Is the app querying for a protocol which we support? */
  if (af != AF_INET && af != AF_INET6)
    {
      *errnop = EAFNOSUPPORT;
      *h_errnop = HOST_NOT_FOUND;
      return NSS_STATUS_UNAVAIL;
    }

  /* Always block the DNS-over-HTTPS canary domain, otherwise browsers may use
   * DNS-over-HTTPS and bypass NSS.
   *
   * See https://support.mozilla.org/en-US/kb/canary-domain-use-application-dnsnet
   */
  if (strcmp (name, "use-application-dns.net") == 0)
    domain_action = BLOCK;

  if (domain_action == UNKNOWN)
    {
      /* Open the filter list file and check that next. */
      int filter_list_fd __attribute__((__cleanup__(clear_fd))) = -1;
      size_t name_len = strlen (name);
      char allow_name[HOSTNAME_MAX + 2] = { '\0' };
      size_t allow_name_len = 0;
      cdbi_t vlen;
      int open_filter_list_errno;

      /* Build the database key for the allowlist entry for @name by prefixing
       * with a tilde. */
      if (name_len >= sizeof (allow_name) - 1)
        {
          *errnop = ENOMEM;
          *h_errnop = HOST_NOT_FOUND;
          return NSS_STATUS_UNAVAIL;
        }

      allow_name[0] = '~';
      strcpy (allow_name + 1, name);
      allow_name_len = name_len + 1;

      /* Open the filter list. */
      open_filter_list_errno = open_filter_list (&filter_list_fd);
      if (open_filter_list_errno != 0)
        {
          *errnop = open_filter_list_errno;
          *h_errnop = HOST_NOT_FOUND;
          return NSS_STATUS_UNAVAIL;
        }

      if (filter_list_fd >= 0 &&
          cdb_seek (filter_list_fd, name, name_len, &vlen) > 0)
        {
          /* @name was found in the database. If the value is non-empty then
           * that’s a redirect destination. */
          if (vlen > 0 && vlen <= sizeof (redirect_hostname))
            {
              struct addrinfo hints;
              int retval;

              memset (&hints, 0, sizeof (hints));
              hints.ai_family = af;
              hints.ai_flags = AI_V4MAPPED | AI_ADDRCONFIG;

              retval = cdb_bread (filter_list_fd, redirect_hostname, vlen);
              if (retval == 0)
                {
                  recursing = true;
                  retval = getaddrinfo (redirect_hostname, NULL, &hints, &redirect_addrinfo);
                  recursing = false;
                }

              if (retval == 0)
                domain_action = REDIRECT;
              else
                domain_action = BLOCK;
            }
          else
            {
              domain_action = BLOCK;
            }
        }

      if (filter_list_fd >= 0 &&
          cdb_seek (filter_list_fd, "*", strlen ("*"), &vlen) > 0)
        {
          /* Wildcard means that all websites are blocked by default. */
          domain_action = BLOCK;
        }

      if (filter_list_fd >= 0 &&
          cdb_seek (filter_list_fd, allow_name, allow_name_len, &vlen) > 0)
        {
          /* Allow list overrides block lists. */
          domain_action = ALLOW;
        }
    }

  /* Return a result to NSS. */
  switch (domain_action)
    {
    case UNKNOWN:
    case ALLOW:
    default:
      {
        /* Not found in the filter list, so let another module actually resolve it. */
        *errnop = EINVAL;
        *h_errnop = NO_ADDRESS;
        return NSS_STATUS_NOTFOUND;
      }
    case BLOCK:
      {
        const struct in_addr sinkhole_addr = { .s_addr = 0 };
        const struct in6_addr sinkhole_addr6 = { .s6_addr = { 0, } };

        /* Found in the filter list, so return a DNS sinkhole. */
        size_t buffer_offset = 0;
        size_t h_length = (af == AF_INET6) ? sizeof (struct in6_addr) : sizeof (struct in_addr);

        /* Check the buffer size first. */
        if (buffer_len < align_as_pointer (strlen (name) + 1) + align_as_pointer (sizeof (void *)) + align_as_pointer (sizeof (void *) * 2) + align_as_pointer (h_length))
          {
            *errnop = ERANGE;
            *h_errnop = NO_RECOVERY;
            return NSS_STATUS_TRYAGAIN;
          }

        /* Build the result. Even though we never set any h_aliases, tools like
         * `getent` expect a non-NULL (though potentially empty) array. */
        strcpy (buffer, name);
        result->h_name = buffer;
        buffer_offset = align_as_pointer (strlen (name) + 1);

        result->h_aliases = (char **) (buffer + buffer_offset);
        buffer_offset += align_as_pointer (sizeof (void *));
        result->h_aliases[0] = NULL;

        result->h_addrtype = af;
        result->h_length = h_length;

        result->h_addr_list = (char **) (buffer + buffer_offset);
        buffer_offset += align_as_pointer (sizeof (void *) * 2);
        memcpy (buffer + buffer_offset, (af == AF_INET6) ? (char *) &sinkhole_addr6 : (char *) &sinkhole_addr, result->h_length);
        result->h_addr_list[0] = buffer + buffer_offset;
        buffer_offset += align_as_pointer (result->h_length);
        result->h_addr_list[1] = NULL;

        assert (buffer_offset <= buffer_len);

        if (ttlp != NULL)
          *ttlp = 0;
        if (canonp != NULL)
          *canonp = result->h_name;

        *errnop = 0;
        *h_errnop = 0;
        return NSS_STATUS_SUCCESS;
      }
    case REDIRECT:
      {
        /* Convert redirect_addrinfo to a hostent result */
        size_t buffer_offset = 0;
        size_t h_length = (af == AF_INET6) ? sizeof (struct in6_addr) : sizeof (struct in_addr);
        size_t n_addrinfos = 0, i = 0;

        /* Count how many results there are. */
        for (struct addrinfo *ai = redirect_addrinfo; ai != NULL; ai = ai->ai_next)
          n_addrinfos++;

        /* Check the buffer size first. */
        if (buffer_len < align_as_pointer (strlen (redirect_hostname) + 1) + align_as_pointer (sizeof (void *)) + align_as_pointer (sizeof (void *) * (n_addrinfos + 1)) + n_addrinfos * align_as_pointer (h_length))
          {
            *errnop = ERANGE;
            *h_errnop = NO_RECOVERY;
            return NSS_STATUS_TRYAGAIN;
          }

        /* Build the result. Even though we never set any h_aliases, tools like
         * `getent` expect a non-NULL (though potentially empty) array.
         *
         * Note that we set result->h_name to `redirect_hostname` rather than to
         * `name`. This simulates a CNAME response and allows (for example)
         * browser location bars to update to use the redirected hostname. */
        strcpy (buffer, redirect_hostname);
        result->h_name = buffer;
        buffer_offset = align_as_pointer (strlen (redirect_hostname) + 1);

        result->h_aliases = (char **) (buffer + buffer_offset);
        buffer_offset += align_as_pointer (sizeof (void *));
        result->h_aliases[0] = NULL;

        result->h_addrtype = af;
        result->h_length = h_length;

        result->h_addr_list = (char **) (buffer + buffer_offset);
        buffer_offset += align_as_pointer (sizeof (void *) * (n_addrinfos + 1));

        for (struct addrinfo *ai = redirect_addrinfo; ai != NULL; ai = ai->ai_next)
          {
            /* Should be guaranteed by the `hints` in the query */
            assert (ai->ai_family == af);
            assert (ai->ai_addr->sa_family == af);

            memcpy (buffer + buffer_offset, sockaddr_to_inet_addr (ai->ai_addr, ai->ai_addrlen), result->h_length);
            result->h_addr_list[i++] = buffer + buffer_offset;
            buffer_offset += align_as_pointer (result->h_length);
          }

        result->h_addr_list[i] = NULL;

        assert (buffer_offset <= buffer_len);

        if (ttlp != NULL)
          *ttlp = 0;
        if (canonp != NULL)
          *canonp = result->h_name;

        *errnop = 0;
        *h_errnop = 0;
        return NSS_STATUS_SUCCESS;
      }
    }
}

enum nss_status
_nss_malcontent_gethostbyname2_r (const char     *name,
                                  int             af,
                                  struct hostent *result,
                                  char           *buffer,
                                  size_t          buffer_len,
                                  int            *errnop,
                                  int            *h_errnop)
{
  return _nss_malcontent_gethostbyname3_r (name, af, result, buffer, buffer_len,
                                           errnop, h_errnop, NULL, NULL);
}
