From 111e5a1d2ef7686f27d040dda100b4d15b4b1408 Mon Sep 17 00:00:00 2001 From: Yousf Ateya Date: Mon, 20 Apr 2015 23:00:00 +0200 Subject: [PATCH] ASTERISK-24983: Prevent deadlock between hanup and sending lagrq/ping channels/chan_iax.c: Prevent the deadlock between iax2_hangup and send_lagrq/ send_ping. This deadlock happens because scheduled task send_lagrq(or send_ping) start execution after call hangup procedure started but before deleting the tasks the scheduler. The solution is to prevent lagrq (and ping) from acquiring the mutex if call disappear or hangup in process. This commit cleans up the procedure of sending LAGRQ and PING. main/sched.c: Add warning message if scheduler is stuck while deleting and executing task. Change-Id: I03bec1fc8faacb89630269e935fa667c6d6c080c --- channels/chan_iax2.c | 99 ++++++++++++++++++++++++++++++---------------------- main/sched.c | 16 ++++++++- 2 files changed, 73 insertions(+), 42 deletions(-) diff --git a/channels/chan_iax2.c b/channels/chan_iax2.c index 8d3e7f1..60728d2 100644 --- a/channels/chan_iax2.c +++ b/channels/chan_iax2.c @@ -395,8 +395,6 @@ static int (*iax2_regfunk)(const char *username, int onoff) = NULL; static struct io_context *io; static struct ast_sched_context *sched; -#define DONT_RESCHEDULE -2 - static iax2_format iax2_capability = IAX_CAPABILITY_FULLBANDWIDTH; static int iaxdebug = 0; @@ -864,6 +862,8 @@ struct chan_iax2_pvt { int frames_dropped; /*! received frame count: (just for stats) */ int frames_received; + /*! Destroying this call initiated. */ + int destroy_initiated; /*! num bytes used for calltoken ie, even an empty ie should contain 2 */ unsigned char calltoken_ie_len; /*! hold all signaling frames from the pbx thread until we have a destination callno */ @@ -1671,23 +1671,47 @@ static int iax2_sched_add(struct ast_sched_context *con, int when, return ast_sched_add(con, when, callback, data); } +/* + * \brief Try to acquire the iaxsl[callno] if call exists and not having ongoing hangup. + * \param callno Call number to lock. + * \return 0 If call disappeared or has ongoing hangup procedure. 1 If call found and mutex is locked. + * + * Try to acquire the iaxsl[callno] MUTEX lock to avoid the deadlock that might + * happen, in case a hangup thread locks the MUTEX before this thread does and + * then tries to lock it one more time while this thread is already locked and + * waiting for the MUTEX to be released. See ASTERISK-24983. + */ +static int iax_try_lock_unless_hungup(int callno){ + while (ast_mutex_trylock(&iaxsl[callno]) != 0){ + /*Return if callno disappeared OR there is ongoing hang up procedure.*/ + if(!iaxs[callno] || (iaxs[callno] && iaxs[callno]->destroy_initiated)) { + ast_debug(3, "I wanted to lock callno %d, but it is dead or going to die.\n", callno); + return 0; + } + ast_debug(3,"Waiting for the callno %d hangup to finish\n", callno); + usleep(100); + } + return 1; +} + static int send_ping(const void *data); static void __send_ping(const void *data) { - int callno = (long) data; + int callno = PTR_TO_CALLNO(data); - ast_mutex_lock(&iaxsl[callno]); + if (iax_try_lock_unless_hungup(callno) == 0) { + ast_debug(3, "Hangup initiated on call %d, aborting __send_ping\n", callno); + return; + } - if (iaxs[callno]) { - if (iaxs[callno]->peercallno) { - send_command(iaxs[callno], AST_FRAME_IAX, IAX_COMMAND_PING, 0, NULL, 0, -1); - if (iaxs[callno]->pingid != DONT_RESCHEDULE) { - iaxs[callno]->pingid = iax2_sched_add(sched, ping_time * 1000, send_ping, data); - } - } - } else { - ast_debug(1, "I was supposed to send a PING with callno %d, but no such call exists.\n", callno); + /* callno is now locked. */ + if (iaxs[callno]->peercallno) { + /* Send PING packet. */ + send_command(iaxs[callno], AST_FRAME_IAX, IAX_COMMAND_PING, 0, NULL, 0, -1); + + /* Schedule sending next ping. */ + iaxs[callno]->pingid = iax2_sched_add(sched, ping_time * 1000, send_ping, data); } ast_mutex_unlock(&iaxsl[callno]); @@ -1695,13 +1719,6 @@ static void __send_ping(const void *data) static int send_ping(const void *data) { - int callno = (long) data; - ast_mutex_lock(&iaxsl[callno]); - if (iaxs[callno] && iaxs[callno]->pingid != DONT_RESCHEDULE) { - iaxs[callno]->pingid = -1; - } - ast_mutex_unlock(&iaxsl[callno]); - #ifdef SCHED_MULTITHREADED if (schedule_action(__send_ping, data)) #endif @@ -1742,19 +1759,20 @@ static int send_lagrq(const void *data); static void __send_lagrq(const void *data) { - int callno = (long) data; + int callno = PTR_TO_CALLNO(data); - ast_mutex_lock(&iaxsl[callno]); + if (iax_try_lock_unless_hungup(callno) == 0) { + ast_debug(3, "Hangup initiated on call %d, aborting __send_lagrq\n", callno); + return; + } - if (iaxs[callno]) { - if (iaxs[callno]->peercallno) { - send_command(iaxs[callno], AST_FRAME_IAX, IAX_COMMAND_LAGRQ, 0, NULL, 0, -1); - if (iaxs[callno]->lagid != DONT_RESCHEDULE) { - iaxs[callno]->lagid = iax2_sched_add(sched, lagrq_time * 1000, send_lagrq, data); - } - } - } else { - ast_debug(1, "I was supposed to send a LAGRQ with callno %d, but no such call exists.\n", callno); + /* callno is now locked. */ + if (iaxs[callno]->peercallno) { + /* Send LAGRQ packet. */ + send_command(iaxs[callno], AST_FRAME_IAX, IAX_COMMAND_LAGRQ, 0, NULL, 0, -1); + + /* Schedule sending next lagrq. */ + iaxs[callno]->lagid = iax2_sched_add(sched, lagrq_time * 1000, send_lagrq, data); } ast_mutex_unlock(&iaxsl[callno]); @@ -1762,13 +1780,6 @@ static void __send_lagrq(const void *data) static int send_lagrq(const void *data) { - int callno = (long) data; - ast_mutex_lock(&iaxsl[callno]); - if (iaxs[callno] && iaxs[callno]->lagid != DONT_RESCHEDULE) { - iaxs[callno]->lagid = -1; - } - ast_mutex_unlock(&iaxsl[callno]); - #ifdef SCHED_MULTITHREADED if (schedule_action(__send_lagrq, data)) #endif @@ -2068,11 +2079,16 @@ static void iax2_destroy_helper(struct chan_iax2_pvt *pvt) ast_clear_flag64(pvt, IAX_MAXAUTHREQ); } - /* No more pings or lagrq's */ + + + /* Mark call destroy initiated flag. */ + pvt->destroy_initiated = 1; + + /* Remove scheduled (but didn't run yet) PINGs or LAGRQs */ + /* Already running tasks will be terminated because of destroy_initiated. */ AST_SCHED_DEL_SPINLOCK(sched, pvt->pingid, &iaxsl[pvt->callno]); - pvt->pingid = DONT_RESCHEDULE; AST_SCHED_DEL_SPINLOCK(sched, pvt->lagid, &iaxsl[pvt->callno]); - pvt->lagid = DONT_RESCHEDULE; + AST_SCHED_DEL(sched, pvt->autoid); AST_SCHED_DEL(sched, pvt->authid); AST_SCHED_DEL(sched, pvt->initid); @@ -5250,6 +5266,7 @@ static int iax2_hangup(struct ast_channel *c) if (callno && iaxs[callno]) { ast_debug(1, "We're hanging up %s now...\n", ast_channel_name(c)); alreadygone = ast_test_flag64(iaxs[callno], IAX_ALREADYGONE); + /* Send the hangup unless we have had a transmission error or are already gone */ iax_ie_append_byte(&ied, IAX_IE_CAUSECODE, (unsigned char)ast_channel_hangupcause(c)); if (!iaxs[callno]->error && !alreadygone) { diff --git a/main/sched.c b/main/sched.c index 911143c..7eb4013 100644 --- a/main/sched.c +++ b/main/sched.c @@ -492,12 +492,26 @@ int _ast_sched_del(struct ast_sched_context *con, int id, const char *file, int } sched_release(con, s); } else if (con->currently_executing && (id == con->currently_executing->id)) { + int ret; + struct timespec ts = {0}; + struct timeval tv; + const struct timeval sleeptv = {.tv_sec= 0, .tv_usec= 1000}; + s = con->currently_executing; s->deleted = 1; + /* Wait for executing task to complete so that caller of ast_sched_del() does not * free memory out from under the task. */ - ast_cond_wait(&s->cond, &con->lock); + do { + ast_log(LOG_WARNING, "Waiting for sched task %d to finish to delete it.\n", id); + tv = ast_tvadd(ast_tvnow(), sleeptv); + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * 1000; + /* Wait for 1ms before trying again. This is ONLY used to print warning that + * scheduler is stuck until this task is finished. */ + ret = ast_cond_timedwait(&s->cond, &con->lock, &ts); + } while(ret != 0); /* Do not sched_release() here because ast_sched_runq() will do it */ } -- 1.9.1