feat: retry DNS resolution indefinitely and add -4/-6 flags in hbc and hbc_mini.c
Mirror the same changes from hbc_mini.py: retry host resolution with exponential backoff (5s→60s) instead of exiting on DNS failure, and add mutually exclusive -4 / -6 flags to restrict connections to IPv4 or IPv6. In hbc (main.py) the retry sleep is interruptible via the shutdown_event. In hbc_mini.c signal handlers are moved before the resolution loop so SIGINT/SIGTERM can break the retry during startup. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+20
-5
@@ -518,29 +518,41 @@ async def async_main(args, config):
|
|||||||
|
|
||||||
logger.info(f"hbc {__version__} on {iam} -> {hb_hosts} port={hb_port}, interval={interval}s")
|
logger.info(f"hbc {__version__} on {iam} -> {hb_hosts} port={hb_port}, interval={interval}s")
|
||||||
|
|
||||||
|
af_filter = (socket.AF_INET if getattr(args, "ipv4_only", False)
|
||||||
|
else socket.AF_INET6 if getattr(args, "ipv6_only", False)
|
||||||
|
else 0)
|
||||||
|
|
||||||
# Create connections
|
# Create connections
|
||||||
connections = []
|
connections = []
|
||||||
conn_id = 1
|
conn_id = 1
|
||||||
|
_retry_delay = 5
|
||||||
|
|
||||||
|
while running and not connections:
|
||||||
for host in hb_hosts:
|
for host in hb_hosts:
|
||||||
try:
|
try:
|
||||||
addrs = socket.getaddrinfo(host, hb_port, 0, 0, socket.SOL_UDP)
|
addrs = socket.getaddrinfo(host, hb_port, af_filter, 0, socket.SOL_UDP)
|
||||||
except socket.gaierror as e:
|
except socket.gaierror as e:
|
||||||
logger.error(f"Cannot resolve {host}: {e}")
|
logger.warning(f"Cannot resolve {host}: {e} — retrying in {_retry_delay}s")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for addr_info in addrs:
|
for addr_info in addrs:
|
||||||
af = addr_info[0]
|
af = addr_info[0]
|
||||||
addr = addr_info[4][0]
|
addr = addr_info[4][0]
|
||||||
|
|
||||||
conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
|
conn = AsyncConnection(conn_id, addr, hb_port, af, iam)
|
||||||
if not await conn.open():
|
if not await conn.open():
|
||||||
logger.warning(f"Initial open to {addr} failed, heartbeat sender will retry")
|
logger.warning(f"Initial open to {addr} failed, heartbeat sender will retry")
|
||||||
connections.append(conn)
|
connections.append(conn)
|
||||||
conn_id += 1
|
conn_id += 1
|
||||||
|
if not connections:
|
||||||
|
try:
|
||||||
|
if shutdown_event:
|
||||||
|
await asyncio.wait_for(shutdown_event.wait(), timeout=_retry_delay)
|
||||||
|
else:
|
||||||
|
await asyncio.sleep(_retry_delay)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass
|
||||||
|
_retry_delay = min(_retry_delay * 2, 60)
|
||||||
|
|
||||||
if not connections:
|
if not connections:
|
||||||
logger.error("No connections established (DNS resolution failed for all hosts)")
|
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
logger.info(f"Created {len(connections)} connections")
|
logger.info(f"Created {len(connections)} connections")
|
||||||
@@ -726,6 +738,9 @@ def build_parser():
|
|||||||
default=0,
|
default=0,
|
||||||
help="Increase debug level"
|
help="Increase debug level"
|
||||||
)
|
)
|
||||||
|
af_group = parser.add_mutually_exclusive_group()
|
||||||
|
af_group.add_argument("-4", dest="ipv4_only", action="store_true", help="Use IPv4 only")
|
||||||
|
af_group.add_argument("-6", dest="ipv6_only", action="store_true", help="Use IPv6 only")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"hosts",
|
"hosts",
|
||||||
nargs="+",
|
nargs="+",
|
||||||
|
|||||||
+23
-9
@@ -1264,6 +1264,8 @@ static void usage(const char *prog) {
|
|||||||
" -c FILE Config file (JSON)\n"
|
" -c FILE Config file (JSON)\n"
|
||||||
" -m MSG Send one-shot message\n"
|
" -m MSG Send one-shot message\n"
|
||||||
" -n NAME Override hostname\n"
|
" -n NAME Override hostname\n"
|
||||||
|
" -4 Use IPv4 only\n"
|
||||||
|
" -6 Use IPv6 only\n"
|
||||||
" -d Daemonize\n"
|
" -d Daemonize\n"
|
||||||
" -v Verbose (info)\n"
|
" -v Verbose (info)\n"
|
||||||
" -x Debug\n"
|
" -x Debug\n"
|
||||||
@@ -1276,9 +1278,10 @@ int main(int argc, char **argv) {
|
|||||||
const char *cfgpath = NULL;
|
const char *cfgpath = NULL;
|
||||||
const char *message = NULL;
|
const char *message = NULL;
|
||||||
const char *nameov = NULL;
|
const char *nameov = NULL;
|
||||||
|
int af_filter = 0;
|
||||||
|
|
||||||
int opt;
|
int opt;
|
||||||
while ((opt = getopt(argc, argv, "bc:m:n:dvxh")) != -1) {
|
while ((opt = getopt(argc, argv, "bc:m:n:dvxh46")) != -1) {
|
||||||
switch (opt) {
|
switch (opt) {
|
||||||
case 'b': do_boot = true; break;
|
case 'b': do_boot = true; break;
|
||||||
case 'c': cfgpath = optarg; break;
|
case 'c': cfgpath = optarg; break;
|
||||||
@@ -1287,6 +1290,8 @@ int main(int argc, char **argv) {
|
|||||||
case 'd': do_daemon = true; break;
|
case 'd': do_daemon = true; break;
|
||||||
case 'v': g_log_level = LL_INFO; break;
|
case 'v': g_log_level = LL_INFO; break;
|
||||||
case 'x': g_log_level = LL_DEBUG; break;
|
case 'x': g_log_level = LL_DEBUG; break;
|
||||||
|
case '4': af_filter = AF_INET; break;
|
||||||
|
case '6': af_filter = AF_INET6; break;
|
||||||
case 'h': usage(argv[0]); return 0;
|
case 'h': usage(argv[0]); return 0;
|
||||||
default: usage(argv[0]); return 1;
|
default: usage(argv[0]); return 1;
|
||||||
}
|
}
|
||||||
@@ -1313,14 +1318,24 @@ int main(int argc, char **argv) {
|
|||||||
char *dot = strchr(iam, '.'); if (dot) *dot = '\0';
|
char *dot = strchr(iam, '.'); if (dot) *dot = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct sigaction sa = {0};
|
||||||
|
sa.sa_handler = sig_handler;
|
||||||
|
sigaction(SIGTERM, &sa, NULL);
|
||||||
|
sigaction(SIGINT, &sa, NULL);
|
||||||
|
sigaction(SIGHUP, &sa, NULL);
|
||||||
|
|
||||||
int conn_id = 1;
|
int conn_id = 1;
|
||||||
|
int retry_delay = 5;
|
||||||
|
while (g_running && !g_nconns) {
|
||||||
for (int i = 0; i < nhost; i++) {
|
for (int i = 0; i < nhost; i++) {
|
||||||
struct addrinfo hints = {0}, *res = NULL;
|
struct addrinfo hints = {0}, *res = NULL;
|
||||||
hints.ai_socktype = SOCK_DGRAM;
|
hints.ai_socktype = SOCK_DGRAM;
|
||||||
hints.ai_protocol = IPPROTO_UDP;
|
hints.ai_protocol = IPPROTO_UDP;
|
||||||
|
hints.ai_family = af_filter;
|
||||||
char ps[16]; snprintf(ps, sizeof(ps), "%d", cfg.hb_port);
|
char ps[16]; snprintf(ps, sizeof(ps), "%d", cfg.hb_port);
|
||||||
if (getaddrinfo(hosts[i], ps, &hints, &res) != 0) {
|
if (getaddrinfo(hosts[i], ps, &hints, &res) != 0) {
|
||||||
LOGE("cannot resolve %s", hosts[i]); continue;
|
LOGW("cannot resolve %s — retrying in %ds", hosts[i], retry_delay);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
for (struct addrinfo *ai = res; ai && g_nconns < MAX_HOSTS; ai = ai->ai_next) {
|
for (struct addrinfo *ai = res; ai && g_nconns < MAX_HOSTS; ai = ai->ai_next) {
|
||||||
conn_t *c = &g_conns[g_nconns];
|
conn_t *c = &g_conns[g_nconns];
|
||||||
@@ -1336,13 +1351,12 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
freeaddrinfo(res);
|
freeaddrinfo(res);
|
||||||
}
|
}
|
||||||
if (!g_nconns) { LOGE("no connections established"); return 1; }
|
if (!g_nconns) {
|
||||||
|
sleep(retry_delay);
|
||||||
struct sigaction sa = {0};
|
if (retry_delay < 60) retry_delay *= 2;
|
||||||
sa.sa_handler = sig_handler;
|
}
|
||||||
sigaction(SIGTERM, &sa, NULL);
|
}
|
||||||
sigaction(SIGINT, &sa, NULL);
|
if (!g_nconns) return 1;
|
||||||
sigaction(SIGHUP, &sa, NULL);
|
|
||||||
|
|
||||||
conn_t *primary = &g_conns[0];
|
conn_t *primary = &g_conns[0];
|
||||||
LOGI("hbc_mini-c %s on %s -> %s port=%d interval=%ds",
|
LOGI("hbc_mini-c %s on %s -> %s port=%d interval=%ds",
|
||||||
|
|||||||
Reference in New Issue
Block a user