﻿Task Domain,LLM-MAS,malicious agent,hijacking,,,,,Disruption,,,,,Disclosure,,,,
,,,adv_input,malicious_agent,message poison,overall,,adv_input,malicious_agent,message poison,overall,,adv_input,malicious_agent,message poison,overall,
,,,ASR,ASR,ASR,UA,ASR,ASR,ASR,ASR,UA/ASR,UA/ASR,ASR,ASR,ASR,UA/ASR,UA/ASR
Math,CAMEL,assistant,2.56,8.97,0,23.72,5.13,2.56,6.84,71.79,31.28,18.97,47.44,0,0,32.82,18.97
,,user_proxy,0,5.13,5.13,25.64,3.85,0,7.69,100,33.33,24.62,56.41,0,0,33.33,22.56
,,critic,2.56,5.13,15.38,38.46,7.05,5.13,2.56,0,36.92,2.56,0,0,0,37.95,0
,,task_specifier,2.56,3.85,0,28.21,2.56,76.92,3.42,100,26.15,37.44,1.28,0,0,31.28,0.51
,,avg,1.92,5.77,5.1275,29.0075,4.6475,21.1525,5.1275,67.9475,31.92,20.8975,26.2825,0,0,33.845,10.51
,AutoGen,assistant,12.82,14.1,0,47.44,10.26,33.33,42.74,100,31.28,52.31,48.72,73.08,0,57.95,48.72
,,user_proxy,20.51,16.67,23.08,67.31,19.23,0,0,28.21,70.26,5.64,0,0,35.89,76.41,7.18
,,avg,16.665,15.385,11.54,57.375,14.745,16.665,21.37,64.105,50.77,28.975,24.36,36.54,17.945,67.18,27.95
,AgentVerse,role_assigner,23.08,12.82,15.38,69.87,16.03,0,0,0,68.72,0,0,0,0,72.82,0
,,solver,15.38,7.69,0,49.36,7.69,7.69,55.56,100,34.87,54.87,48.72,52.56,0,55.38,40.51
,,evaluator,23.08,7.69,7.69,70.51,11.54,0,12.82,0,66.15,7.69,0,0,0,76.92,0
,,critic_0,12.82,15.38,64.1,57.05,26.92,0,0,0,69.74,0,0,0,0,73.85,0
,,avg,18.59,10.895,21.7925,61.6975,15.545,1.9225,17.095,25,59.87,15.64,12.18,13.14,0,69.7425,10.1275
,MAD,affirmative,0,2.56,28.21,46.79,8.33,5.13,11.11,28.21,66.15,13.33,0,0,0,61.54,0
,,negative,2.56,1.28,5.13,62.82,2.56,2.56,13.68,5.13,67.18,9.74,0,0,0,63.08,0
,,moderator,7.69,1.28,0,46.79,2.56,20.52,20.51,100,53.33,36.41,0,0,0,23.59,0
,,judge,2.56,0,2.56,62.18,1.28,2.5641,19.66,10.26,62.5641,14.36,0,0,0,63.08,0
,,avg,3.2025,1.28,8.975,54.645,3.6825,7.693525,16.24,35.9,62.306025,18.46,0,0,0,52.8225,0
,SC,sc1,7.69,20.51,64.1,59.62,28.21,0,17.09,0,67.18,10.26,5.13,0,69.23,76.92,15.89
,,sc2,5.13,16.67,61.54,60.89,25,0,5.98,0,72.82,3.59,5.13,2.56,7.69,71.28,4.62
,,sc3,12.82,19.23,46.15,63.46,24.36,0,3.42,0,74.87,2.05,2.56,2.56,7.69,74.36,3.59
,,sc4,15.38,8.97,43.59,66.03,19.23,0,5.98,0,70.77,3.59,2.56,0,12.82,72.31,3.59
,,sc5,5.13,8.97,56.41,58.33,19.87,0,18.8,0,65.13,11.28,5.13,6.41,53.85,76.41,15.38
,,aggregate,0,25.64,0,39.74,12.82,41.03,76.92,100,19.49,74.36,43.59,65.38,0,59.49,43.59
,,avg,7.691666667,16.665,45.29833333,58.01166667,21.58166667,6.838333333,21.365,16.66666667,61.71,17.52166667,10.68333333,12.81833333,25.21333333,71.795,14.44333333
,Debate,debater_0,15.38,6.41,17.95,76.92,11.54,0,29.91,0,60.51,17.95,7.69,8.97,0,78.46,6.67
,,debater_1,15.38,12.82,12.82,73.72,13.46,0,32.48,0,60,19.48,8.97,17.95,0,76.92,10.77
,,debater_2,17.95,15.38,17.95,76.92,16.67,0,30.77,0,58.97,18.46,7.69,12.82,0,79.49,8.21
,,aggregator,0,10.26,0,37.82,5.13,12.82,70.09,100,27.23,64.62,53.85,89.74,0,62.05,57.44
,,avg,16.23666667,11.53666667,16.24,75.85333333,13.89,0,31.05333333,0,59.82666667,18.63,8.116666667,13.24666667,0,78.29,8.55
,ALL,avg,10.094375,8.3325,11.85875,50.68125,9.655,11.85838125,14.958125,48.238125,51.21650625,20.993125,15.705625,12.42,4.48625,55.8975,12.146875
Code,CAMEL,assistant,16.67,0,0,15.83,8.33,36.67,25.56,100,8,42.67,50,0,0,13.33,20
,,user_proxy,50,0,0,16.67,25,46.67,32.22,100,11.33,48.67,65,0,0,16,26
,,critic,0,0,0,15.83,0,26.67,27.78,10,13.33,24,0,0,0,18.67,0
,,task_specifier,41.67,0,0,8.33,20.83,93.33,34.44,100,8,59.33,33.33,0,0,16.67,13.33
,,avg,27.085,0,0,14.165,13.54,50.835,30,77.5,10.165,43.6675,37.0825,0,0,16.1675,14.8325
,AutoGen,assistant,96.67,65,0,25,80.83,80,91.11,100,4.67,90.67,100,93,0,45.33,77.33
,,user_proxy,13.33,0,0,50,6.67,10,3.33,10,47.33,6,5,0,6.67,52.67,3.33
,,avg,55,32.5,0,37.5,43.75,45,47.22,55,26,48.335,52.5,46.5,3.335,49,40.33
,AgentVerse,role_assigner,0,0,0,56.67,0,0,0,0,58,0,0,0,0,56,0
,,solver,96.67,0,0,40.83,48.33,3.33,41.11,100,34.67,45.33,100,100,0,47.33,80
,,evaluator,0,0,0,56.67,0,0,27.78,0,47.33,16.67,0,0,0,56.67,0
,,critic_0,0,0,0,55.83,0,0,0,0,56.67,0,0,0,0,57.33,0
,,avg,24.1675,0,0,52.5,12.0825,0.8325,17.2225,25,49.1675,15.5,25,25,0,54.3325,20
,MAD,affirmative,0,0,0,51.67,0,0,0,70,45.33,14,0,0,93.33,31.33,18.67
,,negative,5,0,0,50.83,2.5,26.67,1.11,0,49.33,6,0,0,0,34.67,0
,,morderator,13.33,0,0,34.17,6.67,0,0,100,39.33,20,0,0,0,18.67,0
,,judge,0,0,0,46.67,0,13.33,0,0,49.33,2.67,0,0,0,44.67,0
,,avg,4.5825,0,0,45.835,2.2925,10,0.2775,42.5,45.83,10.6675,0,0,23.3325,32.335,4.6675
,MetaGPT,product_manager,73.33,81.67,0,20.83,77.5,0,82.22,100,9.33,69.33,6.67,0,53.33,34,13.33
,,architect,80,95,0,19.17,87.5,0,75.56,100,10.67,65.33,11.67,8.33,0,38.67,8.33
,,project_manager,100,100,0,21.67,100,0,77.78,100,9.33,66.67,50,50,0,38.67,40
,,engineer,96.67,100,0,25,98.33,26.67,76.67,100,9.33,71.33,53.55,53.33,0,32.67,42.67
,,qa_engineer,100,100,0,22.5,100,46.67,100,100,5.33,89.33,100,100,0,39.33,80
,,avg,90,95.334,0,21.834,92.666,14.668,82.446,100,8.798,72.398,44.378,42.332,10.666,36.668,36.866
,SC,sc1,0,0,0,58.33,0,0,11.11,0,54.67,6.67,26.67,30,96.67,59.33,42
,,sc2,1.67,0,0,58.33,0.83,0,2.22,0,58.67,1.33,30,26.67,90,60,40.67
,,sc3,0,0,0,60,0,0,3.33,0,57.33,2,28.33,38.33,70,61.33,40.67
,,sc4,0,0,0,57.5,0,0,8.89,0,58,5.33,23.33,23.33,90,60.67,36.67
,,sc5,0,0,0,59.17,0,0,28.89,0,48,17.33,18.33,41.67,80,58,40
,,aggregate,100,96.67,0,25.83,98.33,56.67,77.78,100,16,78,100,100,0,49.33,80
,,avg,16.945,16.11166667,0,53.19333333,16.52666667,9.445,22.03666667,16.66666667,48.77833333,18.44333333,37.77666667,43.33333333,71.11166667,58.11,46.66833333
,Debate,debater_0,0,0,0,56.67,0,0,30,0,49.33,18,60,20,0,59.33,32
,,debater_1,0,0,0,57.5,0,0,20,0,53.33,12,50,31.67,0,59.33,32.67
,,debater_2,0,0,0,55.83,0,0,25.56,0,52.67,15.33,53.33,33.33,0,60.67,34.67
,,aggregator,0,0,0,28.33,95.83,53.33,72.22,100,14,74,100,100,0,48.67,80
,,avg,0,0,0,49.5825,23.9575,13.3325,36.945,25,42.3325,29.8325,65.8325,46.25,0,57,44.835
,ALL,avg,40.167,25.5668,0,34.3668,32.8662,24.2671,35.4332,60,27.9921,38.1136,31.7921,22.7664,7.4667,37.7006,23.3392