feat: retry DNS resolution indefinitely and add -4/-6 flags in hbc and hbc_mini.c
Mirror the same changes from hbc_mini.py: retry host resolution with exponential backoff (5s→60s) instead of exiting on DNS failure, and add mutually exclusive -4 / -6 flags to restrict connections to IPv4 or IPv6. In hbc (main.py) the retry sleep is interruptible via the shutdown_event. In hbc_mini.c signal handlers are moved before the resolution loop so SIGINT/SIGTERM can break the retry during startup. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+33
-18
@@ -518,31 +518,43 @@ async def async_main(args, config):
|
||||
|
||||
logger.info(f"hbc {__version__} on {iam} -> {hb_hosts} port={hb_port}, interval={interval}s")
|
||||
|
||||
af_filter = (socket.AF_INET if getattr(args, "ipv4_only", False)
|
||||
else socket.AF_INET6 if getattr(args, "ipv6_only", False)
|
||||
else 0)
|
||||
|
||||
# Create connections
|
||||
connections = []
|
||||
conn_id = 1
|
||||
|
||||
for host in hb_hosts:
|
||||
try:
|
||||
addrs = socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
|
||||
except socket.gaierror as e:
|
||||
logger.error(f"Cannot resolve {host}: {e}")
|
||||
continue
|
||||
|
||||
for addr_info in addrs:
|
||||
af = addr_info[0]
|
||||
addr = addr_info[4][0]
|
||||
_retry_delay = 5
|
||||
|
||||
conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
|
||||
if not await conn.open():
|
||||
logger.warning(f"Initial open to {addr} failed, heartbeat sender will retry")
|
||||
connections.append(conn)
|
||||
conn_id += 1
|
||||
while running and not connections:
|
||||
for host in hb_hosts:
|
||||
try:
|
||||
addrs = socket.getaddrinfo(host, hb_port, af_filter, 0, socket.SOL_UDP)
|
||||
except socket.gaierror as e:
|
||||
logger.warning(f"Cannot resolve {host}: {e} — retrying in {_retry_delay}s")
|
||||
continue
|
||||
for addr_info in addrs:
|
||||
af = addr_info[0]
|
||||
addr = addr_info[4][0]
|
||||
conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
|
||||
if not await conn.open():
|
||||
logger.warning(f"Initial open to {addr} failed, heartbeat sender will retry")
|
||||
connections.append(conn)
|
||||
conn_id += 1
|
||||
if not connections:
|
||||
try:
|
||||
if shutdown_event:
|
||||
await asyncio.wait_for(shutdown_event.wait(), timeout=_retry_delay)
|
||||
else:
|
||||
await asyncio.sleep(_retry_delay)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
_retry_delay = min(_retry_delay * 2, 60)
|
||||
|
||||
if not connections:
|
||||
logger.error("No connections established (DNS resolution failed for all hosts)")
|
||||
return 1
|
||||
|
||||
|
||||
logger.info(f"Created {len(connections)} connections")
|
||||
|
||||
# Send boot/message if requested
|
||||
@@ -726,6 +738,9 @@ def build_parser():
|
||||
default=0,
|
||||
help="Increase debug level"
|
||||
)
|
||||
af_group = parser.add_mutually_exclusive_group()
|
||||
af_group.add_argument("-4", dest="ipv4_only", action="store_true", help="Use IPv4 only")
|
||||
af_group.add_argument("-6", dest="ipv6_only", action="store_true", help="Use IPv6 only")
|
||||
parser.add_argument(
|
||||
"hosts",
|
||||
nargs="+",
|
||||
|
||||
+40
-26
@@ -1264,6 +1264,8 @@ static void usage(const char *prog) {
|
||||
" -c FILE Config file (JSON)\n"
|
||||
" -m MSG Send one-shot message\n"
|
||||
" -n NAME Override hostname\n"
|
||||
" -4 Use IPv4 only\n"
|
||||
" -6 Use IPv6 only\n"
|
||||
" -d Daemonize\n"
|
||||
" -v Verbose (info)\n"
|
||||
" -x Debug\n"
|
||||
@@ -1276,9 +1278,10 @@ int main(int argc, char **argv) {
|
||||
const char *cfgpath = NULL;
|
||||
const char *message = NULL;
|
||||
const char *nameov = NULL;
|
||||
int af_filter = 0;
|
||||
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "bc:m:n:dvxh")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "bc:m:n:dvxh46")) != -1) {
|
||||
switch (opt) {
|
||||
case 'b': do_boot = true; break;
|
||||
case 'c': cfgpath = optarg; break;
|
||||
@@ -1287,6 +1290,8 @@ int main(int argc, char **argv) {
|
||||
case 'd': do_daemon = true; break;
|
||||
case 'v': g_log_level = LL_INFO; break;
|
||||
case 'x': g_log_level = LL_DEBUG; break;
|
||||
case '4': af_filter = AF_INET; break;
|
||||
case '6': af_filter = AF_INET6; break;
|
||||
case 'h': usage(argv[0]); return 0;
|
||||
default: usage(argv[0]); return 1;
|
||||
}
|
||||
@@ -1313,37 +1318,46 @@ int main(int argc, char **argv) {
|
||||
char *dot = strchr(iam, '.'); if (dot) *dot = '\0';
|
||||
}
|
||||
|
||||
int conn_id = 1;
|
||||
for (int i = 0; i < nhost; i++) {
|
||||
struct addrinfo hints = {0}, *res = NULL;
|
||||
hints.ai_socktype = SOCK_DGRAM;
|
||||
hints.ai_protocol = IPPROTO_UDP;
|
||||
char ps[16]; snprintf(ps, sizeof(ps), "%d", cfg.hb_port);
|
||||
if (getaddrinfo(hosts[i], ps, &hints, &res) != 0) {
|
||||
LOGE("cannot resolve %s", hosts[i]); continue;
|
||||
}
|
||||
for (struct addrinfo *ai = res; ai && g_nconns < MAX_HOSTS; ai = ai->ai_next) {
|
||||
conn_t *c = &g_conns[g_nconns];
|
||||
memset(c, 0, sizeof(*c));
|
||||
c->conn_id = conn_id++; c->port = cfg.hb_port;
|
||||
c->af = ai->ai_family; c->sockfd = -1;
|
||||
snprintf(c->name, sizeof(c->name), "%s", iam);
|
||||
void *addr = (ai->ai_family == AF_INET)
|
||||
? (void *)&((struct sockaddr_in *)ai->ai_addr)->sin_addr
|
||||
: (void *)&((struct sockaddr_in6 *)ai->ai_addr)->sin6_addr;
|
||||
inet_ntop(ai->ai_family, addr, c->addr, sizeof(c->addr));
|
||||
if (conn_open(c)) { g_nconns++; LOGI("connected to %s", c->addr); }
|
||||
}
|
||||
freeaddrinfo(res);
|
||||
}
|
||||
if (!g_nconns) { LOGE("no connections established"); return 1; }
|
||||
|
||||
struct sigaction sa = {0};
|
||||
sa.sa_handler = sig_handler;
|
||||
sigaction(SIGTERM, &sa, NULL);
|
||||
sigaction(SIGINT, &sa, NULL);
|
||||
sigaction(SIGHUP, &sa, NULL);
|
||||
|
||||
int conn_id = 1;
|
||||
int retry_delay = 5;
|
||||
while (g_running && !g_nconns) {
|
||||
for (int i = 0; i < nhost; i++) {
|
||||
struct addrinfo hints = {0}, *res = NULL;
|
||||
hints.ai_socktype = SOCK_DGRAM;
|
||||
hints.ai_protocol = IPPROTO_UDP;
|
||||
hints.ai_family = af_filter;
|
||||
char ps[16]; snprintf(ps, sizeof(ps), "%d", cfg.hb_port);
|
||||
if (getaddrinfo(hosts[i], ps, &hints, &res) != 0) {
|
||||
LOGW("cannot resolve %s — retrying in %ds", hosts[i], retry_delay);
|
||||
continue;
|
||||
}
|
||||
for (struct addrinfo *ai = res; ai && g_nconns < MAX_HOSTS; ai = ai->ai_next) {
|
||||
conn_t *c = &g_conns[g_nconns];
|
||||
memset(c, 0, sizeof(*c));
|
||||
c->conn_id = conn_id++; c->port = cfg.hb_port;
|
||||
c->af = ai->ai_family; c->sockfd = -1;
|
||||
snprintf(c->name, sizeof(c->name), "%s", iam);
|
||||
void *addr = (ai->ai_family == AF_INET)
|
||||
? (void *)&((struct sockaddr_in *)ai->ai_addr)->sin_addr
|
||||
: (void *)&((struct sockaddr_in6 *)ai->ai_addr)->sin6_addr;
|
||||
inet_ntop(ai->ai_family, addr, c->addr, sizeof(c->addr));
|
||||
if (conn_open(c)) { g_nconns++; LOGI("connected to %s", c->addr); }
|
||||
}
|
||||
freeaddrinfo(res);
|
||||
}
|
||||
if (!g_nconns) {
|
||||
sleep(retry_delay);
|
||||
if (retry_delay < 60) retry_delay *= 2;
|
||||
}
|
||||
}
|
||||
if (!g_nconns) return 1;
|
||||
|
||||
conn_t *primary = &g_conns[0];
|
||||
LOGI("hbc_mini-c %s on %s -> %s port=%d interval=%ds",
|
||||
HBC_VERSION, iam, hosts[0], cfg.hb_port, cfg.interval);
|
||||
|
||||
Reference in New Issue
Block a user