feat: retry DNS resolution indefinitely and add -4/-6 flags in hbc and hbc_mini.c

Mirror the same changes from hbc_mini.py: retry host resolution with
exponential backoff (5s→60s) instead of exiting on DNS failure, and add
mutually exclusive -4 / -6 flags to restrict connections to IPv4 or IPv6.

In hbc (main.py) the retry sleep is interruptible via the shutdown_event.
In hbc_mini.c signal handlers are moved before the resolution loop so
SIGINT/SIGTERM can break the retry during startup.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Andreas Wrede
2026-05-12 08:15:53 -04:00
parent b907343e36
commit 588eb2a792
2 changed files with 73 additions and 44 deletions
+32 -17
View File
@@ -518,29 +518,41 @@ async def async_main(args, config):
logger.info(f"hbc {__version__} on {iam} -> {hb_hosts} port={hb_port}, interval={interval}s")
af_filter = (socket.AF_INET if getattr(args, "ipv4_only", False)
else socket.AF_INET6 if getattr(args, "ipv6_only", False)
else 0)
# Create connections
connections = []
conn_id = 1
_retry_delay = 5
for host in hb_hosts:
try:
addrs = socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
except socket.gaierror as e:
logger.error(f"Cannot resolve {host}: {e}")
continue
for addr_info in addrs:
af = addr_info[0]
addr = addr_info[4][0]
conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
if not await conn.open():
logger.warning(f"Initial open to {addr} failed, heartbeat sender will retry")
connections.append(conn)
conn_id += 1
while running and not connections:
for host in hb_hosts:
try:
addrs = socket.getaddrinfo(host, hb_port, af_filter, 0, socket.SOL_UDP)
except socket.gaierror as e:
logger.warning(f"Cannot resolve {host}: {e} — retrying in {_retry_delay}s")
continue
for addr_info in addrs:
af = addr_info[0]
addr = addr_info[4][0]
conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
if not await conn.open():
logger.warning(f"Initial open to {addr} failed, heartbeat sender will retry")
connections.append(conn)
conn_id += 1
if not connections:
try:
if shutdown_event:
await asyncio.wait_for(shutdown_event.wait(), timeout=_retry_delay)
else:
await asyncio.sleep(_retry_delay)
except asyncio.TimeoutError:
pass
_retry_delay = min(_retry_delay * 2, 60)
if not connections:
logger.error("No connections established (DNS resolution failed for all hosts)")
return 1
logger.info(f"Created {len(connections)} connections")
@@ -726,6 +738,9 @@ def build_parser():
default=0,
help="Increase debug level"
)
af_group = parser.add_mutually_exclusive_group()
af_group.add_argument("-4", dest="ipv4_only", action="store_true", help="Use IPv4 only")
af_group.add_argument("-6", dest="ipv6_only", action="store_true", help="Use IPv6 only")
parser.add_argument(
"hosts",
nargs="+",
+40 -26
View File
@@ -1264,6 +1264,8 @@ static void usage(const char *prog) {
" -c FILE Config file (JSON)\n"
" -m MSG Send one-shot message\n"
" -n NAME Override hostname\n"
" -4 Use IPv4 only\n"
" -6 Use IPv6 only\n"
" -d Daemonize\n"
" -v Verbose (info)\n"
" -x Debug\n"
@@ -1276,9 +1278,10 @@ int main(int argc, char **argv) {
const char *cfgpath = NULL;
const char *message = NULL;
const char *nameov = NULL;
int af_filter = 0;
int opt;
while ((opt = getopt(argc, argv, "bc:m:n:dvxh")) != -1) {
while ((opt = getopt(argc, argv, "bc:m:n:dvxh46")) != -1) {
switch (opt) {
case 'b': do_boot = true; break;
case 'c': cfgpath = optarg; break;
@@ -1287,6 +1290,8 @@ int main(int argc, char **argv) {
case 'd': do_daemon = true; break;
case 'v': g_log_level = LL_INFO; break;
case 'x': g_log_level = LL_DEBUG; break;
case '4': af_filter = AF_INET; break;
case '6': af_filter = AF_INET6; break;
case 'h': usage(argv[0]); return 0;
default: usage(argv[0]); return 1;
}
@@ -1313,37 +1318,46 @@ int main(int argc, char **argv) {
char *dot = strchr(iam, '.'); if (dot) *dot = '\0';
}
int conn_id = 1;
for (int i = 0; i < nhost; i++) {
struct addrinfo hints = {0}, *res = NULL;
hints.ai_socktype = SOCK_DGRAM;
hints.ai_protocol = IPPROTO_UDP;
char ps[16]; snprintf(ps, sizeof(ps), "%d", cfg.hb_port);
if (getaddrinfo(hosts[i], ps, &hints, &res) != 0) {
LOGE("cannot resolve %s", hosts[i]); continue;
}
for (struct addrinfo *ai = res; ai && g_nconns < MAX_HOSTS; ai = ai->ai_next) {
conn_t *c = &g_conns[g_nconns];
memset(c, 0, sizeof(*c));
c->conn_id = conn_id++; c->port = cfg.hb_port;
c->af = ai->ai_family; c->sockfd = -1;
snprintf(c->name, sizeof(c->name), "%s", iam);
void *addr = (ai->ai_family == AF_INET)
? (void *)&((struct sockaddr_in *)ai->ai_addr)->sin_addr
: (void *)&((struct sockaddr_in6 *)ai->ai_addr)->sin6_addr;
inet_ntop(ai->ai_family, addr, c->addr, sizeof(c->addr));
if (conn_open(c)) { g_nconns++; LOGI("connected to %s", c->addr); }
}
freeaddrinfo(res);
}
if (!g_nconns) { LOGE("no connections established"); return 1; }
struct sigaction sa = {0};
sa.sa_handler = sig_handler;
sigaction(SIGTERM, &sa, NULL);
sigaction(SIGINT, &sa, NULL);
sigaction(SIGHUP, &sa, NULL);
int conn_id = 1;
int retry_delay = 5;
while (g_running && !g_nconns) {
for (int i = 0; i < nhost; i++) {
struct addrinfo hints = {0}, *res = NULL;
hints.ai_socktype = SOCK_DGRAM;
hints.ai_protocol = IPPROTO_UDP;
hints.ai_family = af_filter;
char ps[16]; snprintf(ps, sizeof(ps), "%d", cfg.hb_port);
if (getaddrinfo(hosts[i], ps, &hints, &res) != 0) {
LOGW("cannot resolve %s — retrying in %ds", hosts[i], retry_delay);
continue;
}
for (struct addrinfo *ai = res; ai && g_nconns < MAX_HOSTS; ai = ai->ai_next) {
conn_t *c = &g_conns[g_nconns];
memset(c, 0, sizeof(*c));
c->conn_id = conn_id++; c->port = cfg.hb_port;
c->af = ai->ai_family; c->sockfd = -1;
snprintf(c->name, sizeof(c->name), "%s", iam);
void *addr = (ai->ai_family == AF_INET)
? (void *)&((struct sockaddr_in *)ai->ai_addr)->sin_addr
: (void *)&((struct sockaddr_in6 *)ai->ai_addr)->sin6_addr;
inet_ntop(ai->ai_family, addr, c->addr, sizeof(c->addr));
if (conn_open(c)) { g_nconns++; LOGI("connected to %s", c->addr); }
}
freeaddrinfo(res);
}
if (!g_nconns) {
sleep(retry_delay);
if (retry_delay < 60) retry_delay *= 2;
}
}
if (!g_nconns) return 1;
conn_t *primary = &g_conns[0];
LOGI("hbc_mini-c %s on %s -> %s port=%d interval=%ds",
HBC_VERSION, iam, hosts[0], cfg.hb_port, cfg.interval);