[Home]

Summary:ASTERISK-11457: SIGABORT in ast_channel_trylock in chan_local local_queue_frame when channel disappears underneath the code
Reporter:Steve Davies . (stevedavies)Labels:
Date Opened:2008-02-17 09:27:40.000-0600Date Closed:2010-01-19 07:47:42.000-0600
Priority:CriticalRegression?No
Status:Closed/CompleteComponents:Channels/chan_local
Versions:Frequency of
Occurrence
Related
Issues:
Environment:Attachments:
Description:Asterisk SIGABORTS - a "stale" channel pointer is passed to ast_channel_trylock.

Program terminated with signal 6, Aborted.

#0  0x00002ab5aaf45b95 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x00002ab5aaf46f90 in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x00002ab5aaf3f256 in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#3  0x00002ab5aaa3ad5a in pthread_mutex_trylock () from /lib64/libpthread.so.0
No symbol table info available.
#4  0x00002aaaadc881a9 in local_queue_frame (p=0x2aaab5b49cd0, isoutbound=<value optimized out>, f=0x2aaab5481458, us=0x2aaab5b5a7f0)
  from /usr/lib/asterisk/modules/chan_local.so
other = (struct ast_channel *) 0x2aaab5b5b180
ASTERISK-1  0x00002aaaadc886b1 in local_write (ast=0x2aaab5b5a7f0, f=0x2aaab5481458) from /usr/lib/asterisk/modules/chan_local.so
p = (struct local_pvt *) 0x7ac2
res = <value optimized out>
isoutbound = <value optimized out>
__PRETTY_FUNCTION__ = "local_write"
ASTERISK-2  0x000000000043ef8c in ast_write (chan=0x2aaab5b5a7f0, fr=0x2aaab5481458) at channel.c:2682
jump = <value optimized out>
res = -1
count = <value optimized out>
f = (struct ast_frame *) 0x2aaab5481458
f2 = (struct ast_frame *) 0x0
__PRETTY_FUNCTION__ = "ast_write"
ASTERISK-3  0x0000000000440d8e in ast_channel_bridge (c0=0x2aaab5415c10, c1=0x2aaab5b5a7f0, config=0x40b38ba0, fo=0x40b379a8, rc=0x40b379a0)
   at channel.c:3806
monitored_source = 0
to = <value optimized out>
res = <value optimized out>
nativefailed = 0
firstpass = <value optimized out>
o0nativeformats = 4
o1nativeformats = 4
time_left_ms = <value optimized out>
nexteventts = {tv_sec = 0, tv_usec = 0}
caller_warning = 0 '\0'
callee_warning = 0 '\0'
__PRETTY_FUNCTION__ = "ast_channel_bridge"
ASTERISK-4  0x00002aaaaafcd125 in ast_bridge_call (chan=0x2aaab5415c10, peer=0x2aaab5b5a7f0, config=0x40b38ba0) at res_features.c:1420
other = (struct ast_channel *) 0x0
f = (struct ast_frame *) 0x0
who = (struct ast_channel *) 0x2aaab2cc8fa1
chan_featurecode = '\0' <repeats 11 times>
peer_featurecode = '\0' <repeats 11 times>
res = 538996016
hasfeatures = 0
backup_config = {features_caller = {flags = 0}, features_callee = {flags = 0}, start_time = {tv_sec = 0, tv_usec = 0},
 feature_timer = 0, timelimit = 0, play_warning = 0, warning_freq = 0, warning_sound = 0x0, end_sound = 0x0, start_sound = 0x0,
 firstpass = 0, flags = 0}
bridge_cdr = <value optimized out>
__PRETTY_FUNCTION__ = "ast_bridge_call"
ASTERISK-5  0x00002aaab2cc7290 in dial_exec_full (chan=0x2aaab5415c10, data=<value optimized out>, peerflags=0x40b38e10, continue_exec=0x0)
  from /usr/lib/asterisk/modules/app_dial.so
toast = "\000\000\000\000\000\000\000\000rÖ÷ªµ*\000\00030;40\000\000\000ØJ\235\000\000\000\000\000ßJ\235\000\000\000\000\000X\000\000\000\000\000\000\000\020\016\016«ª*\000\000\006\000\000\000\000\000\000\000Àh\036«ª*\000\000S6÷ªµ*\000"
config = {features_caller = {flags = 0}, features_callee = {flags = 0}, start_time = {tv_sec = 1203059599, tv_usec = 762167},
 feature_timer = 0, timelimit = 0, play_warning = 0, warning_freq = 0, warning_sound = 0x0, end_sound = 0x0, start_sound = 0x0,
 firstpass = 0, flags = 0}
res = 0
u = (struct ast_module_user *) 0x2aaab5b39810
rest = 0x0
cur = <value optimized out>
outgoing = (struct dial_localuser *) 0x2aaab5b0a7f0
peer = (struct ast_channel *) 0x2aaab5b5a7f0
to = -1
numbusy = 0
numcongestion = 0
numnochan = 0
cause = 0
numsubst = "0828294060@from-internal/n", '\0' <repeats 30 times>, "0\213³@\000\000\000\000X`M\000\000\000\000\000p\214³@\000\000\000\000P\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\020\214³@\000\000\000\000\223\001öªµ*\000\0000\000\000\0000\000\000\000\020\214³@\000\000\000\0000\213³@\000\000\000\000ʳ÷ªµ*\000\000\001\200 û\000\000\000\000ØJ\235\000\000\000\000\000ØJ\235\000\000\000\000\000\033\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\020\214³@\000\000\000\000¸K\235\000\000\000\000\000Y\200üªµ*\000\000\000\000\000\000\000\000\000\000s\000\000\000"...
cidname = '\0' <repeats 79 times>
privdb_val = 0
calldurationlimit = 0
timelimit = 0
play_warning = 0
warning_freq = 0
warning_sound = 0x0
end_sound = 0x0
start_sound = 0x0
dtmfcalled = 0x0
dtmfcalling = 0x0
status = "ANSWER\000R\000GS", '\0' <repeats 244 times>
play_to_caller = 0
play_to_callee = 0
sentringing = 0
moh = 0
outbound_group = 0x0
result = <value optimized out>
start_time = <value optimized out>
privintro = "\000\000\000\000\033\000\000\000ÃIO", '\0' <repeats 13 times>, "x\210³@\000\000\000\000\000\000\000\000\006\000\000\000\035", '\0' <repeats 11 times>, "\n", '\0' <repeats 43 times>, " \204³@\000\000\000\000ʳ÷ªµ*\000\000\001\200 û\000\000\000dør\220\000\000\000\000\000ør\220\000\000\000\000\000ør\220\000\000\000\000\000ør\220\000\000\000\000\000>s\220\000\000\000\000\000Øs\220\000\000\000\000\000ør\220\000\000\000\000\000Øs\220", '\0' <repeats 29 times>, "5K\235\000\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000ª*\000\000\002\000\000\000\000\000\000\000J\217³@\000\000\000\000#\000\000\000\000\000\000\000"...
privcid = "\030\000\000\0000\000\000\000ð\213³@\000\000\000\0000\213³@\000\000\000\000\221\211A\000\000\000\000\000Feb 15 09:12:50\000\001\000\000\000\000\000\000\0000~Jµª*\000\000\000\020\000\000\000\000\000\000\220\211³@\000\000\000\000 \231³@", '\0' <repeats 12 times>, "\022©Eµª*\000\000ÄýF ª*\000\000H`Aµª*\000\000\000\005b\000\000\000\000\000\001\000\000\000ÿÿÿ\177", '\0' <repeats 24 times>, "3®Eµª*\000\000 \231³@", '\0' <repeats 12 times>, "°\211³@", '\0' <repeats 12 times>, "ÙIO\000\000\000\000\000p\214³@\000\000\000\000p\214³@"...
opermode = 0
args = {argc = 3, argv = 0x40b38d38, peers = 0x40b37a40 "Local", timeout = 0x40b37a61 "", options = 0x40b37a62 "g", url = 0x0}
opts = {flags = 32}
opt_args = {0x2aaab5b0a854 "O_DEPTH", 0x0, 0x40b38e60 "Àh\036«ª*", 0x40b38d60 "\030",
 0x2aaaab0e0e10 "    -- AGI Script Executing Application: (%s) Options: (%s)\n", 0x6 <Address 0x6 out of bounds>,
 0x2aaaab1e68c0 "T\v\016«ª*", 0x4641f3 "H\215eà[A\\A]A^ÉÃHÇ\003\200", 0x0}
datastore = (struct ast_datastore *) 0x2aaab5b0a810
fulldial = 0
num_dialed = 1
__PRETTY_FUNCTION__ = "dial_exec_full"
ASTERISK-6 0x00002aaab2cc8885 in dial_exec (chan=0x7ac2, data=0x5fab) from /usr/lib/asterisk/modules/app_dial.so
peerflags = {flags = 32}
ASTERISK-7 0x0000000000475fc7 in pbx_exec (c=0x2aaab5415c10, app=0x7a1820, data=0x40b38f4a) at pbx.c:532
res = 0
saved_c_appl = 0x6583c8 "DeadAGI"
saved_c_data = 0x40b3caa0 "queueDial.agi|0828294060|Local/0828294060@from-internal/n|1999|Agent/6049"
ASTERISK-8 0x00002aaaab0dcef6 in free () from /usr/lib/asterisk/modules/res_agi.so
No symbol table info available.
ASTERISK-9 0x00002aaaab0dd9ba in free () from /usr/lib/asterisk/modules/res_agi.so
No symbol table info available.
ASTERISK-10 0x00002aaaab0dee71 in free () from /usr/lib/asterisk/modules/res_agi.so
No symbol table info available.
ASTERISK-11 0x000000000047dfed in pbx_extension_helper (c=0x2aaab5415c10, con=<value optimized out>, context=0x2aaab5415e60 "from-qmagent",
   exten=0x2aaab5415eb0 "0828294060", priority=8, label=<value optimized out>, callerid=0x2aaab5401cd0 "5036", action=E_SPAWN) at pbx.c:532
e = <value optimized out>
app = (struct ast_app *) 0x6583a0
res = <value optimized out>
q = {incstack = {0x0 <repeats 128 times>}, stacklen = 0, status = 5, swo = 0x0, data = 0x0,
 foundcontext = 0x2aaab5415e60 "from-qmagent"}
passdata = "queueDial.agi|0828294060|Local/0828294060@from-internal/n|1999|Agent/6049", '\0' <repeats 8118 times>
matching_action = <value optimized out>
__PRETTY_FUNCTION__ = "pbx_extension_helper"
ASTERISK-12 0x00000000004806d3 in __ast_pbx_run (c=0x2aaab5415c10) at pbx.c:2306
waittime = <value optimized out>
dst_exten = "\020\000\000\0000\000\000\000àð³@\000\000\000\000À+\203\000\000\000\000\000\"A\203\000\000\000\000\000 l\206µª*\000\000\000\000\000\000\000\000\000\000\205_\000\000\000\000\000\000\211QB", '\0' <repeats 29 times>, "\201>", '\0' <repeats 38 times>, "\030", '\0' <repeats 55 times>, "\200)\024«µ*\000\000\000\000\000\000\000\000\000\000\030\000\000\000\000\000\000\000°\\­\000\000\000\000\000Q\003\002\000\000\000\000\000\000XKµª*\000\000¿Gøªµ*\000\0000?Hµª*\000\0000?Hµª*\000"
pos = 33188
digit = 0
found = 1
res = 0
error = <value optimized out>
__PRETTY_FUNCTION__ = "__ast_pbx_run"
ASTERISK-13 0x0000000000481359 in pbx_thread (data=0x7ac2) at pbx.c:2623
No locals.
ASTERISK-14 0x00000000004a9c7c in dummy_start (data=<value optimized out>) at utils.c:852
__cancel_buf = {__cancel_jmp_buf = {{__cancel_jmp_buf = {46912674217776, -68840629257102411, 46959740289200, 1081583936, 3,
       46912674420736, -68840630319060331, -68840629252225065}, __mask_was_saved = 0}}, __pad = {0x40b3f1e0, 0x0, 0x2ab5ab13d450,
   0x2ab5aaa380b0}}
__cancel_arg = (void *) 0x40b3f940
not_first_call = <value optimized out>
ret = <value optimized out>
ASTERISK-15 0x00002ab5aaa38143 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
ASTERISK-16 0x00002ab5aafd574d in clone () from /lib64/libc.so.6
No symbol table info available.
ASTERISK-17 0x0000000000000000 in ?? ()
No symbol table info available.




Frame 4, executing in local_queue_frame around here:

       /* Ensure that we have both channels locked */
       if (us) {
               while (ast_channel_trylock(other)) {
                       ast_channel_unlock(us);
                       usleep(1);
                       ast_channel_lock(us);
               }
       } else {
               ast_channel_lock(other);
       }

Presumably in the "ast_channel_trylock"

(gdb) p us->lock
$3 = {__data = {__lock = 1, __count = 1, __owner = 24491, __nusers = 1, __kind = 1, __spins = 0, __list = {__prev = 0x0, __next = 0x0}},
 __size = "\001\000\000\000\001\000\000\000«_\000\000\001\000\000\000\001", '\0' <repeats 22 times>, __align = 4294967297}


24491 is our thread - so we have "us" locked.

Problem is that "other" is junk:

(gdb) p *other
$5 = {tech = 0x4000000002, tech_pvt = 0xa000000140, __begin_field = 0x2aaab5b5b190, name = 0x1 <Address 0x1 out of bounds>,
 language = 0x1fa <Address 0x1fa out of bounds>, musicclass = 0x40 <Address 0x40 out of bounds>, accountcode = 0x2aaab5b5b370 "ulawtolin",
 call_forward = 0x2aaab5b5b230 "", uniqueid = 0x47b53c03 <Address 0x47b53c03 out of bounds>, __end_field = 0x2aaab5b5b1c0, __field_mgr = {
   pool = 0x8a4e4, size = 0, space = 1, used = 327490}, fds = {20, 0, 7346, 0, 0, 0, 0, 0}, music_state = 0x0, generatordata = 0x0,
 generator = 0x0, _bridge = 0x0, masq = 0x0, masqr = 0x0, cdrflags = -1572864, _softhangup = 8650832, whentohangup = 13510803167444984,
 blocker = 18440269913017810952, lock = {__data = {__lock = -6750320, __count = 4259708, __owner = -4128776, __nusers = 4287627280,
     __kind = 32, __spins = -6226024, __list = {__prev = 0xff4cff3c0000fff0, __next = 0x30003000000008}},
   __size = "\220ÿ\230ÿ|ÿ@\000øÿÀÿ\020\000\220ÿ \000\000\000\230ÿ ÿðÿ\000\000<ÿLÿ\b\000\000\0000\0000", __align = 18295310838726544},
 blockproc = 0xffe800380094fff8 <Address 0xffe800380094fff8 out of bounds>,
 appl = 0xffc0fff000600028 <Address 0xffc0fff000600028 out of bounds>, data = 0x48007000380020 <Address 0x48007000380020 out of bounds>,
 fdno = 6815776, sched = 0x20ffd8ffa80010, streamid = -5701712, stream = 0x38003800000000, vstreamid = -524272,
 vstream = 0x28001800000000, oldwriteformat = -3080312, timingfd = 40, timingfunc = 0xffc0fff0fff80018, timingdata = 0xffb0fff800380020,
 _state = 1638368, rings = 1048632, cid = {cid_dnid = 0xffe0ff6cffe8fff0 <Address 0xffe0ff6cffe8fff0 out of bounds>,
   cid_num = 0x30ffd0ffd80000 <Address 0x30ffd0ffd80000 out of bounds>,
   cid_name = 0x30003000180068 <Address 0x30003000180068 out of bounds>,
   cid_ani = 0x40007800b400a4 <Address 0x40007800b400a4 out of bounds>,
   cid_rdnis = 0xff6cffc000780040 <Address 0xff6cffc000780040 out of bounds>, cid_pres = 1114088, cid_ani2 = -3670016, cid_ton = 5308384,
   cid_tns = -4718528},
 dtmfq = " \000\000\000\230ÿðÿ\\ÿlÿ@\000ðÿØÿøÿ\000\000x\000H\000H\000\000\000°ÿlÿlÿèÿàÿH\000°ÿ¨ÿèÿ ÿ\230ÿ\\ÿ`\000\020\000èÿ¤\000(\000\030\0000\000`\000\020\000\020\000\030\000Ðÿ\000", dtmff = {frametype = 16, subclass = 6815792, datalen = 1048648, samples = 6815832,
   mallocd = 2002873461, mallocd_hdr_len = 110, offset = 0, src = 0x175 <Address 0x175 out of bounds>, data = 0x800000002, delivery = {
     tv_sec = 687194767520, tv_usec = 1}, frame_list = {next = 0x15b}, flags = 64, ts = 46912681391328, len = 46912681391168, seqno = 0},
 context = "\000\000\000\000\000\000\000\000À\225³µª*", '\0' <repeats 65 times>,
 exten = '\0' <repeats 32 times>, "TUTVPWWTUQWUWTÔÔÔÔÑ×U×ÔUTUUTUÔÔ×××ÕUTTT×ÔWUUTTUU", priority = 1431655765,
 macrocontext = "QVÕUUÔÔÔÔÔÕÕÕÖ×TQVUWTÕÕÕÕÔÔÕÔÔÕUUUUURQÔÕÔ×ßÒÕÔUTWVTVWUUÕÕÔÕÔÒÐW]SÕVWÔÔÔ×ÖÔWTUUWÕ",
 macroexten = "ÞÝVÖÔTÐÕPY]T^SÖT×ÑÑÑÖÑÕUÔUWWzt_read\000aw", '\0' <repeats 14 times>, "\025\002\000\000\000\000\000\000\002\000\000\000@\000\000\000@\001\000\000 \000\000\000\001\000\000", macropriority = 0,
 dialcontext = "ú\001\000\000\000\000\000\000@\000\000\000\000\000\000\000𶵵ª*\000\000°µµµª*\000\000\003<µG\000\000\000\000ä¤\b", '\0' <repeats 13 times>, "\001\000\000\000\000\000\000\000Bÿ\004\000\000\000\000\000\024\000\000\000\000\000\000", pbx = 0x1cb2, amaflags = 0,
 cdr = 0x0, adsicpe = AST_ADSI_UNKNOWN, zone = 0x0, monitor = 0x0, insmpl = 0, outsmpl = 0, fin = 0, fout = 0, hangupcause = -1572864,
 varshead = {first = 0x300000ff6cfff8, last = 0xffe8ffc8ffc00008}, callgroup = 18295310838726544, pickupgroup = 18415218949328273400,
 flags = 32, transfercapability = 65432, readq = {first = 0xff4cff3c0000fff0, last = 0x30003000000008}, alertpipe = {9764856, -1572808},
 nativeformats = 6291496, readformat = -4128784, writeformat = 3670048, writetrans = 0xffe0002000680020, readtrans = 0x20ffd8ffa80010,
 rawreadformat = -5701712, rawwriteformat = -458784, audiohooks = 0x38003800000000, unused = 0x280020fff80010, chan_list = {
   next = 0x28001800000000}, jb = {conf = {flags = 4291886984, max_size = -17732987957805032, resync_threshold = -22236557516210144,
     impl = "àÿ\030\0008\000\020\000ðÿèÿ"}, impl = 0x30ffd0ffd80000, jbobj = 0x30003000180068, timebase = {tv_sec = 18014913917354148,
     tv_usec = -41377096446508992}, next = -15762598694682648, last_format = 5308384, logfile = 0xfff0ff9800000020, flags = 4285333340},
 emulate_dtmf_digit = -40 'Ø', emulate_dtmf_duration = 7864320, dtmf_tv = {tv_sec = -22517998132133816, tv_usec = -8725823071912084},
 visible_indication = -5242808, datastores = {first = 0x60ff5cff98ffa0, last = 0x2800a4ffe80010}}

So the ast_channel_trylock aborts

Where did "other" come from?

static int local_queue_frame(struct local_pvt *p, int isoutbound, struct ast_frame *f, struct ast_channel *us)
{
       struct ast_channel *other = NULL;

       /* Recalculate outbound channel */
       other = isoutbound ? p->owner : p->chan;


(gdb) p other
$10 = (struct ast_channel *) 0x2aaab5b5b180
(gdb) p p->owner
$11 = (struct ast_channel *) 0x2aaab5b5a7f0
(gdb) p p->chan
$12 = (struct ast_channel *) 0x0

So from that it looks like isoutbound was false, so other := p->chan, which WAS 0x2aaab5b5b180.  But in the meantime that channel disappeared...

p->owner->name: Local/0828294060@from-internal-4468,1
Presumably, p->chan->name WAS Local/0828294060@from-internal-4468,2??

My diagnosis is that the "other" channel was closed in another thread between the time that "other" was fished out of the local_pvt structure, and the time that it was passed to ast_channel_trylock.

Don't see obviously that the channel was locked to prevent it from being closed.




****** ADDITIONAL INFORMATION ******

For reference, the function local_queue_frame:

static int local_queue_frame(struct local_pvt *p, int isoutbound, struct ast_frame *f, struct ast_channel *us)
{
       struct ast_channel *other = NULL;

       /* Recalculate outbound channel */
       other = isoutbound ? p->owner : p->chan;

       /* Set glare detection */
       ast_set_flag(p, LOCAL_GLARE_DETECT);
       if (ast_test_flag(p, LOCAL_CANCEL_QUEUE)) {
               /* We had a glare on the hangup.  Forget all this business,
               return and destroy p.  */
               ast_mutex_unlock(&p->lock);
               ast_mutex_destroy(&p->lock);
               free(p);
               return -1;
       }
       if (!other) {
               ast_clear_flag(p, LOCAL_GLARE_DETECT);
               return 0;
       }

       ast_mutex_unlock(&p->lock);

       /* Ensure that we have both channels locked */
       if (us) {
               while (ast_channel_trylock(other)) {
                       ast_channel_unlock(us);
                       usleep(1);
                       ast_channel_lock(us);
               }
       } else {
               ast_channel_lock(other);
       }

       ast_queue_frame(other, f);

       ast_channel_unlock(other);

       ast_mutex_lock(&p->lock);

       ast_clear_flag(p, LOCAL_GLARE_DETECT);

       return 0;
}


Comments:By: Digium Subversion (svnbot) 2008-02-19 13:59:25.000-0600

Repository: asterisk
Revision: 103821

U   branches/1.4/channels/chan_local.c

------------------------------------------------------------------------
r103821 | russell | 2008-02-19 13:59:24 -0600 (Tue, 19 Feb 2008) | 8 lines

Account for the fact that the "other" channel can disappear while the local pvt
is not locked.

(fixes a problem introduced in rev 100581)
(closes issue ASTERISK-11457)
Reported by: stevedavies
Patch by me

------------------------------------------------------------------------

http://svn.digium.com/view/asterisk?view=rev&revision=103821

By: Digium Subversion (svnbot) 2008-02-19 14:02:57.000-0600

Repository: asterisk
Revision: 103822

_U  trunk/
U   trunk/channels/chan_local.c

------------------------------------------------------------------------
r103822 | russell | 2008-02-19 14:02:57 -0600 (Tue, 19 Feb 2008) | 16 lines

Merged revisions 103821 via svnmerge from
https://origsvn.digium.com/svn/asterisk/branches/1.4

........
r103821 | russell | 2008-02-19 14:02:49 -0600 (Tue, 19 Feb 2008) | 8 lines

Account for the fact that the "other" channel can disappear while the local pvt
is not locked.

(fixes a problem introduced in rev 100581)
(closes issue ASTERISK-11457)
Reported by: stevedavies
Patch by me

........

------------------------------------------------------------------------

http://svn.digium.com/view/asterisk?view=rev&revision=103822