Summary: | ASTERISK-11457: SIGABORT in ast_channel_trylock in chan_local local_queue_frame when channel disappears underneath the code | ||
Reporter: | Steve Davies . (stevedavies) | Labels: | |
Date Opened: | 2008-02-17 09:27:40.000-0600 | Date Closed: | 2010-01-19 07:47:42.000-0600 |
Priority: | Critical | Regression? | No |
Status: | Closed/Complete | Components: | Channels/chan_local |
Versions: | Frequency of Occurrence | ||
Related Issues: | |||
Environment: | Attachments: | ||
Description: | Asterisk SIGABORTS - a "stale" channel pointer is passed to ast_channel_trylock. Program terminated with signal 6, Aborted. #0 0x00002ab5aaf45b95 in raise () from /lib64/libc.so.6 No symbol table info available. #1 0x00002ab5aaf46f90 in abort () from /lib64/libc.so.6 No symbol table info available. #2 0x00002ab5aaf3f256 in __assert_fail () from /lib64/libc.so.6 No symbol table info available. #3 0x00002ab5aaa3ad5a in pthread_mutex_trylock () from /lib64/libpthread.so.0 No symbol table info available. #4 0x00002aaaadc881a9 in local_queue_frame (p=0x2aaab5b49cd0, isoutbound=<value optimized out>, f=0x2aaab5481458, us=0x2aaab5b5a7f0) from /usr/lib/asterisk/modules/chan_local.so other = (struct ast_channel *) 0x2aaab5b5b180 ASTERISK-1 0x00002aaaadc886b1 in local_write (ast=0x2aaab5b5a7f0, f=0x2aaab5481458) from /usr/lib/asterisk/modules/chan_local.so p = (struct local_pvt *) 0x7ac2 res = <value optimized out> isoutbound = <value optimized out> __PRETTY_FUNCTION__ = "local_write" ASTERISK-2 0x000000000043ef8c in ast_write (chan=0x2aaab5b5a7f0, fr=0x2aaab5481458) at channel.c:2682 jump = <value optimized out> res = -1 count = <value optimized out> f = (struct ast_frame *) 0x2aaab5481458 f2 = (struct ast_frame *) 0x0 __PRETTY_FUNCTION__ = "ast_write" ASTERISK-3 0x0000000000440d8e in ast_channel_bridge (c0=0x2aaab5415c10, c1=0x2aaab5b5a7f0, config=0x40b38ba0, fo=0x40b379a8, rc=0x40b379a0) at channel.c:3806 monitored_source = 0 to = <value optimized out> res = <value optimized out> nativefailed = 0 firstpass = <value optimized out> o0nativeformats = 4 o1nativeformats = 4 time_left_ms = <value optimized out> nexteventts = {tv_sec = 0, tv_usec = 0} caller_warning = 0 '\0' callee_warning = 0 '\0' __PRETTY_FUNCTION__ = "ast_channel_bridge" ASTERISK-4 0x00002aaaaafcd125 in ast_bridge_call (chan=0x2aaab5415c10, peer=0x2aaab5b5a7f0, config=0x40b38ba0) at res_features.c:1420 other = (struct ast_channel *) 0x0 f = (struct ast_frame *) 0x0 who = (struct ast_channel *) 0x2aaab2cc8fa1 chan_featurecode = '\0' <repeats 11 times> peer_featurecode = '\0' <repeats 11 times> res = 538996016 hasfeatures = 0 backup_config = {features_caller = {flags = 0}, features_callee = {flags = 0}, start_time = {tv_sec = 0, tv_usec = 0}, feature_timer = 0, timelimit = 0, play_warning = 0, warning_freq = 0, warning_sound = 0x0, end_sound = 0x0, start_sound = 0x0, firstpass = 0, flags = 0} bridge_cdr = <value optimized out> __PRETTY_FUNCTION__ = "ast_bridge_call" ASTERISK-5 0x00002aaab2cc7290 in dial_exec_full (chan=0x2aaab5415c10, data=<value optimized out>, peerflags=0x40b38e10, continue_exec=0x0) from /usr/lib/asterisk/modules/app_dial.so toast = "\000\000\000\000\000\000\000\000rÖ÷ªµ*\000\00030;40\000\000\000ØJ\235\000\000\000\000\000ßJ\235\000\000\000\000\000X\000\000\000\000\000\000\000\020\016\016«ª*\000\000\006\000\000\000\000\000\000\000Àh\036«ª*\000\000S6÷ªµ*\000" config = {features_caller = {flags = 0}, features_callee = {flags = 0}, start_time = {tv_sec = 1203059599, tv_usec = 762167}, feature_timer = 0, timelimit = 0, play_warning = 0, warning_freq = 0, warning_sound = 0x0, end_sound = 0x0, start_sound = 0x0, firstpass = 0, flags = 0} res = 0 u = (struct ast_module_user *) 0x2aaab5b39810 rest = 0x0 cur = <value optimized out> outgoing = (struct dial_localuser *) 0x2aaab5b0a7f0 peer = (struct ast_channel *) 0x2aaab5b5a7f0 to = -1 numbusy = 0 numcongestion = 0 numnochan = 0 cause = 0 numsubst = "0828294060@from-internal/n", '\0' <repeats 30 times>, "0\213³@\000\000\000\000X`M\000\000\000\000\000p\214³@\000\000\000\000P\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\020\214³@\000\000\000\000\223\001öªµ*\000\0000\000\000\0000\000\000\000\020\214³@\000\000\000\0000\213³@\000\000\000\000ʳ÷ªµ*\000\000\001\200 û\000\000\000\000ØJ\235\000\000\000\000\000ØJ\235\000\000\000\000\000\033\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\020\214³@\000\000\000\000¸K\235\000\000\000\000\000Y\200üªµ*\000\000\000\000\000\000\000\000\000\000s\000\000\000"... cidname = '\0' <repeats 79 times> privdb_val = 0 calldurationlimit = 0 timelimit = 0 play_warning = 0 warning_freq = 0 warning_sound = 0x0 end_sound = 0x0 start_sound = 0x0 dtmfcalled = 0x0 dtmfcalling = 0x0 status = "ANSWER\000R\000GS", '\0' <repeats 244 times> play_to_caller = 0 play_to_callee = 0 sentringing = 0 moh = 0 outbound_group = 0x0 result = <value optimized out> start_time = <value optimized out> privintro = "\000\000\000\000\033\000\000\000ÃIO", '\0' <repeats 13 times>, "x\210³@\000\000\000\000\000\000\000\000\006\000\000\000\035", '\0' <repeats 11 times>, "\n", '\0' <repeats 43 times>, " \204³@\000\000\000\000ʳ÷ªµ*\000\000\001\200 û\000\000\000dør\220\000\000\000\000\000ør\220\000\000\000\000\000ør\220\000\000\000\000\000ør\220\000\000\000\000\000>s\220\000\000\000\000\000Øs\220\000\000\000\000\000ør\220\000\000\000\000\000Øs\220", '\0' <repeats 29 times>, "5K\235\000\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000ª*\000\000\002\000\000\000\000\000\000\000J\217³@\000\000\000\000#\000\000\000\000\000\000\000"... privcid = "\030\000\000\0000\000\000\000ð\213³@\000\000\000\0000\213³@\000\000\000\000\221\211A\000\000\000\000\000Feb 15 09:12:50\000\001\000\000\000\000\000\000\0000~Jµª*\000\000\000\020\000\000\000\000\000\000\220\211³@\000\000\000\000 \231³@", '\0' <repeats 12 times>, "\022©Eµª*\000\000ÄýF ª*\000\000H`Aµª*\000\000\000\005b\000\000\000\000\000\001\000\000\000ÿÿÿ\177", '\0' <repeats 24 times>, "3®Eµª*\000\000 \231³@", '\0' <repeats 12 times>, "°\211³@", '\0' <repeats 12 times>, "ÙIO\000\000\000\000\000p\214³@\000\000\000\000p\214³@"... opermode = 0 args = {argc = 3, argv = 0x40b38d38, peers = 0x40b37a40 "Local", timeout = 0x40b37a61 "", options = 0x40b37a62 "g", url = 0x0} opts = {flags = 32} opt_args = {0x2aaab5b0a854 "O_DEPTH", 0x0, 0x40b38e60 "Àh\036«ª*", 0x40b38d60 "\030", 0x2aaaab0e0e10 " -- AGI Script Executing Application: (%s) Options: (%s)\n", 0x6 <Address 0x6 out of bounds>, 0x2aaaab1e68c0 "T\v\016«ª*", 0x4641f3 "H\215eà[A\\A]A^ÉÃHÇ\003\200", 0x0} datastore = (struct ast_datastore *) 0x2aaab5b0a810 fulldial = 0 num_dialed = 1 __PRETTY_FUNCTION__ = "dial_exec_full" ASTERISK-6 0x00002aaab2cc8885 in dial_exec (chan=0x7ac2, data=0x5fab) from /usr/lib/asterisk/modules/app_dial.so peerflags = {flags = 32} ASTERISK-7 0x0000000000475fc7 in pbx_exec (c=0x2aaab5415c10, app=0x7a1820, data=0x40b38f4a) at pbx.c:532 res = 0 saved_c_appl = 0x6583c8 "DeadAGI" saved_c_data = 0x40b3caa0 "queueDial.agi|0828294060|Local/0828294060@from-internal/n|1999|Agent/6049" ASTERISK-8 0x00002aaaab0dcef6 in free () from /usr/lib/asterisk/modules/res_agi.so No symbol table info available. ASTERISK-9 0x00002aaaab0dd9ba in free () from /usr/lib/asterisk/modules/res_agi.so No symbol table info available. ASTERISK-10 0x00002aaaab0dee71 in free () from /usr/lib/asterisk/modules/res_agi.so No symbol table info available. ASTERISK-11 0x000000000047dfed in pbx_extension_helper (c=0x2aaab5415c10, con=<value optimized out>, context=0x2aaab5415e60 "from-qmagent", exten=0x2aaab5415eb0 "0828294060", priority=8, label=<value optimized out>, callerid=0x2aaab5401cd0 "5036", action=E_SPAWN) at pbx.c:532 e = <value optimized out> app = (struct ast_app *) 0x6583a0 res = <value optimized out> q = {incstack = {0x0 <repeats 128 times>}, stacklen = 0, status = 5, swo = 0x0, data = 0x0, foundcontext = 0x2aaab5415e60 "from-qmagent"} passdata = "queueDial.agi|0828294060|Local/0828294060@from-internal/n|1999|Agent/6049", '\0' <repeats 8118 times> matching_action = <value optimized out> __PRETTY_FUNCTION__ = "pbx_extension_helper" ASTERISK-12 0x00000000004806d3 in __ast_pbx_run (c=0x2aaab5415c10) at pbx.c:2306 waittime = <value optimized out> dst_exten = "\020\000\000\0000\000\000\000àð³@\000\000\000\000À+\203\000\000\000\000\000\"A\203\000\000\000\000\000 l\206µª*\000\000\000\000\000\000\000\000\000\000\205_\000\000\000\000\000\000\211QB", '\0' <repeats 29 times>, "\201>", '\0' <repeats 38 times>, "\030", '\0' <repeats 55 times>, "\200)\024«µ*\000\000\000\000\000\000\000\000\000\000\030\000\000\000\000\000\000\000°\\\000\000\000\000\000Q\003\002\000\000\000\000\000\000XKµª*\000\000¿Gøªµ*\000\0000?Hµª*\000\0000?Hµª*\000" pos = 33188 digit = 0 found = 1 res = 0 error = <value optimized out> __PRETTY_FUNCTION__ = "__ast_pbx_run" ASTERISK-13 0x0000000000481359 in pbx_thread (data=0x7ac2) at pbx.c:2623 No locals. ASTERISK-14 0x00000000004a9c7c in dummy_start (data=<value optimized out>) at utils.c:852 __cancel_buf = {__cancel_jmp_buf = {{__cancel_jmp_buf = {46912674217776, -68840629257102411, 46959740289200, 1081583936, 3, 46912674420736, -68840630319060331, -68840629252225065}, __mask_was_saved = 0}}, __pad = {0x40b3f1e0, 0x0, 0x2ab5ab13d450, 0x2ab5aaa380b0}} __cancel_arg = (void *) 0x40b3f940 not_first_call = <value optimized out> ret = <value optimized out> ASTERISK-15 0x00002ab5aaa38143 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. ASTERISK-16 0x00002ab5aafd574d in clone () from /lib64/libc.so.6 No symbol table info available. ASTERISK-17 0x0000000000000000 in ?? () No symbol table info available. Frame 4, executing in local_queue_frame around here: /* Ensure that we have both channels locked */ if (us) { while (ast_channel_trylock(other)) { ast_channel_unlock(us); usleep(1); ast_channel_lock(us); } } else { ast_channel_lock(other); } Presumably in the "ast_channel_trylock" (gdb) p us->lock $3 = {__data = {__lock = 1, __count = 1, __owner = 24491, __nusers = 1, __kind = 1, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\001\000\000\000\001\000\000\000«_\000\000\001\000\000\000\001", '\0' <repeats 22 times>, __align = 4294967297} 24491 is our thread - so we have "us" locked. Problem is that "other" is junk: (gdb) p *other $5 = {tech = 0x4000000002, tech_pvt = 0xa000000140, __begin_field = 0x2aaab5b5b190, name = 0x1 <Address 0x1 out of bounds>, language = 0x1fa <Address 0x1fa out of bounds>, musicclass = 0x40 <Address 0x40 out of bounds>, accountcode = 0x2aaab5b5b370 "ulawtolin", call_forward = 0x2aaab5b5b230 "", uniqueid = 0x47b53c03 <Address 0x47b53c03 out of bounds>, __end_field = 0x2aaab5b5b1c0, __field_mgr = { pool = 0x8a4e4, size = 0, space = 1, used = 327490}, fds = {20, 0, 7346, 0, 0, 0, 0, 0}, music_state = 0x0, generatordata = 0x0, generator = 0x0, _bridge = 0x0, masq = 0x0, masqr = 0x0, cdrflags = -1572864, _softhangup = 8650832, whentohangup = 13510803167444984, blocker = 18440269913017810952, lock = {__data = {__lock = -6750320, __count = 4259708, __owner = -4128776, __nusers = 4287627280, __kind = 32, __spins = -6226024, __list = {__prev = 0xff4cff3c0000fff0, __next = 0x30003000000008}}, __size = "\220ÿ\230ÿ|ÿ@\000øÿÀÿ\020\000\220ÿ \000\000\000\230ÿ ÿðÿ\000\000<ÿLÿ\b\000\000\0000\0000", __align = 18295310838726544}, blockproc = 0xffe800380094fff8 <Address 0xffe800380094fff8 out of bounds>, appl = 0xffc0fff000600028 <Address 0xffc0fff000600028 out of bounds>, data = 0x48007000380020 <Address 0x48007000380020 out of bounds>, fdno = 6815776, sched = 0x20ffd8ffa80010, streamid = -5701712, stream = 0x38003800000000, vstreamid = -524272, vstream = 0x28001800000000, oldwriteformat = -3080312, timingfd = 40, timingfunc = 0xffc0fff0fff80018, timingdata = 0xffb0fff800380020, _state = 1638368, rings = 1048632, cid = {cid_dnid = 0xffe0ff6cffe8fff0 <Address 0xffe0ff6cffe8fff0 out of bounds>, cid_num = 0x30ffd0ffd80000 <Address 0x30ffd0ffd80000 out of bounds>, cid_name = 0x30003000180068 <Address 0x30003000180068 out of bounds>, cid_ani = 0x40007800b400a4 <Address 0x40007800b400a4 out of bounds>, cid_rdnis = 0xff6cffc000780040 <Address 0xff6cffc000780040 out of bounds>, cid_pres = 1114088, cid_ani2 = -3670016, cid_ton = 5308384, cid_tns = -4718528}, dtmfq = " \000\000\000\230ÿðÿ\\ÿlÿ@\000ðÿØÿøÿ\000\000x\000H\000H\000\000\000°ÿlÿlÿèÿàÿH\000°ÿ¨ÿèÿ ÿ\230ÿ\\ÿ`\000\020\000èÿ¤\000(\000\030\0000\000`\000\020\000\020\000\030\000Ðÿ\000", dtmff = {frametype = 16, subclass = 6815792, datalen = 1048648, samples = 6815832, mallocd = 2002873461, mallocd_hdr_len = 110, offset = 0, src = 0x175 <Address 0x175 out of bounds>, data = 0x800000002, delivery = { tv_sec = 687194767520, tv_usec = 1}, frame_list = {next = 0x15b}, flags = 64, ts = 46912681391328, len = 46912681391168, seqno = 0}, context = "\000\000\000\000\000\000\000\000À\225³µª*", '\0' <repeats 65 times>, exten = '\0' <repeats 32 times>, "TUTVPWWTUQWUWTÔÔÔÔÑ×U×ÔUTUUTUÔÔ×××ÕUTTT×ÔWUUTTUU", priority = 1431655765, macrocontext = "QVÕUUÔÔÔÔÔÕÕÕÖ×TQVUWTÕÕÕÕÔÔÕÔÔÕUUUUURQÔÕÔ×ßÒÕÔUTWVTVWUUÕÕÔÕÔÒÐW]SÕVWÔÔÔ×ÖÔWTUUWÕ", macroexten = "ÞÝVÖÔTÐÕPY]T^SÖT×ÑÑÑÖÑÕUÔUWWzt_read\000aw", '\0' <repeats 14 times>, "\025\002\000\000\000\000\000\000\002\000\000\000@\000\000\000@\001\000\000 \000\000\000\001\000\000", macropriority = 0, dialcontext = "ú\001\000\000\000\000\000\000@\000\000\000\000\000\000\000𶵵ª*\000\000°µµµª*\000\000\003<µG\000\000\000\000ä¤\b", '\0' <repeats 13 times>, "\001\000\000\000\000\000\000\000Bÿ\004\000\000\000\000\000\024\000\000\000\000\000\000", pbx = 0x1cb2, amaflags = 0, cdr = 0x0, adsicpe = AST_ADSI_UNKNOWN, zone = 0x0, monitor = 0x0, insmpl = 0, outsmpl = 0, fin = 0, fout = 0, hangupcause = -1572864, varshead = {first = 0x300000ff6cfff8, last = 0xffe8ffc8ffc00008}, callgroup = 18295310838726544, pickupgroup = 18415218949328273400, flags = 32, transfercapability = 65432, readq = {first = 0xff4cff3c0000fff0, last = 0x30003000000008}, alertpipe = {9764856, -1572808}, nativeformats = 6291496, readformat = -4128784, writeformat = 3670048, writetrans = 0xffe0002000680020, readtrans = 0x20ffd8ffa80010, rawreadformat = -5701712, rawwriteformat = -458784, audiohooks = 0x38003800000000, unused = 0x280020fff80010, chan_list = { next = 0x28001800000000}, jb = {conf = {flags = 4291886984, max_size = -17732987957805032, resync_threshold = -22236557516210144, impl = "àÿ\030\0008\000\020\000ðÿèÿ"}, impl = 0x30ffd0ffd80000, jbobj = 0x30003000180068, timebase = {tv_sec = 18014913917354148, tv_usec = -41377096446508992}, next = -15762598694682648, last_format = 5308384, logfile = 0xfff0ff9800000020, flags = 4285333340}, emulate_dtmf_digit = -40 'Ø', emulate_dtmf_duration = 7864320, dtmf_tv = {tv_sec = -22517998132133816, tv_usec = -8725823071912084}, visible_indication = -5242808, datastores = {first = 0x60ff5cff98ffa0, last = 0x2800a4ffe80010}} So the ast_channel_trylock aborts Where did "other" come from? static int local_queue_frame(struct local_pvt *p, int isoutbound, struct ast_frame *f, struct ast_channel *us) { struct ast_channel *other = NULL; /* Recalculate outbound channel */ other = isoutbound ? p->owner : p->chan; (gdb) p other $10 = (struct ast_channel *) 0x2aaab5b5b180 (gdb) p p->owner $11 = (struct ast_channel *) 0x2aaab5b5a7f0 (gdb) p p->chan $12 = (struct ast_channel *) 0x0 So from that it looks like isoutbound was false, so other := p->chan, which WAS 0x2aaab5b5b180. But in the meantime that channel disappeared... p->owner->name: Local/0828294060@from-internal-4468,1 Presumably, p->chan->name WAS Local/0828294060@from-internal-4468,2?? My diagnosis is that the "other" channel was closed in another thread between the time that "other" was fished out of the local_pvt structure, and the time that it was passed to ast_channel_trylock. Don't see obviously that the channel was locked to prevent it from being closed. ****** ADDITIONAL INFORMATION ****** For reference, the function local_queue_frame: static int local_queue_frame(struct local_pvt *p, int isoutbound, struct ast_frame *f, struct ast_channel *us) { struct ast_channel *other = NULL; /* Recalculate outbound channel */ other = isoutbound ? p->owner : p->chan; /* Set glare detection */ ast_set_flag(p, LOCAL_GLARE_DETECT); if (ast_test_flag(p, LOCAL_CANCEL_QUEUE)) { /* We had a glare on the hangup. Forget all this business, return and destroy p. */ ast_mutex_unlock(&p->lock); ast_mutex_destroy(&p->lock); free(p); return -1; } if (!other) { ast_clear_flag(p, LOCAL_GLARE_DETECT); return 0; } ast_mutex_unlock(&p->lock); /* Ensure that we have both channels locked */ if (us) { while (ast_channel_trylock(other)) { ast_channel_unlock(us); usleep(1); ast_channel_lock(us); } } else { ast_channel_lock(other); } ast_queue_frame(other, f); ast_channel_unlock(other); ast_mutex_lock(&p->lock); ast_clear_flag(p, LOCAL_GLARE_DETECT); return 0; } | ||
Comments: | By: Digium Subversion (svnbot) 2008-02-19 13:59:25.000-0600 Repository: asterisk Revision: 103821 U branches/1.4/channels/chan_local.c ------------------------------------------------------------------------ r103821 | russell | 2008-02-19 13:59:24 -0600 (Tue, 19 Feb 2008) | 8 lines Account for the fact that the "other" channel can disappear while the local pvt is not locked. (fixes a problem introduced in rev 100581) (closes issue ASTERISK-11457) Reported by: stevedavies Patch by me ------------------------------------------------------------------------ http://svn.digium.com/view/asterisk?view=rev&revision=103821 By: Digium Subversion (svnbot) 2008-02-19 14:02:57.000-0600 Repository: asterisk Revision: 103822 _U trunk/ U trunk/channels/chan_local.c ------------------------------------------------------------------------ r103822 | russell | 2008-02-19 14:02:57 -0600 (Tue, 19 Feb 2008) | 16 lines Merged revisions 103821 via svnmerge from https://origsvn.digium.com/svn/asterisk/branches/1.4 ........ r103821 | russell | 2008-02-19 14:02:49 -0600 (Tue, 19 Feb 2008) | 8 lines Account for the fact that the "other" channel can disappear while the local pvt is not locked. (fixes a problem introduced in rev 100581) (closes issue ASTERISK-11457) Reported by: stevedavies Patch by me ........ ------------------------------------------------------------------------ http://svn.digium.com/view/asterisk?view=rev&revision=103822 |