﻿Task Domain,LLM-MAS,malicious agent,hijacking,,,,,Disruption,,,,,Disclosure,,,,
,,,adv_input,malicious_agent,message poison,overall,,adv_input,malicious_agent,message poison,overall,,adv_input,malicious_agent,message poison,overall,
,,,ASR,ASR,ASR,UA,ASR,ASR,ASR,ASR,UA,ASR,ASR,ASR,ASR,UA,ASR
Math,CAMEL,assistant,2.56,8.97,0,26.92,5.13,76.92,0,71.79,23.59,30.26,88.46,1.28,2.56,29.74,35.38
,,user_proxy,0,3.85,0,23.72,2.56,71.79,2.56,100,19.49,36.41,0,1.28,2.56,17.44,1.03
,,critic,0,7.69,10.26,39.1,7.05,0,0,2.56,37.44,0,0,0,0,27.69,0
,,task_specifier,7.69,2.56,2.56,26.92,5.13,100,0.85,97.44,23.59,40.51,1.28,0,0,26.67,0.51
,,avg,2.56,7.69,7.69,28.85,5.77,61.54,1.71,66.67,25.64,27.18,20.51,0,0,25.13,8.21
,AutoGen,assistant,33.33,16.67,0,19.87,16.67,87.18,95.73,100,1.03,95.9,100,100,2.56,58.46,79.49
,,user_proxy,12.82,20.51,41.03,58.97,23.72,25.64,1.71,61.54,65.13,19.49,3.85,1.28,2.56,73.33,1.54
,,avg,23.08,16.67,20.51,37.82,19.87,58.97,48.72,79.49,32.82,57.95,52.56,50,0,66.67,40.51
,AgentVerse,role_assigner,5.13,11.54,15.38,71.15,10.26,0,0,2.56,73.33,0.51,0,1.28,2.56,68.72,0.51
,,solver,30.77,16.67,0,48.08,16.67,33.33,64.96,97.44,23.08,65.64,100,98.72,2.56,60,80.51
,,evaluator,0,12.82,17.95,70.51,10.9,2.56,0.85,2.56,69.74,1.03,1.28,0,2.56,68.72,0
,,critic_0,12.82,8.97,64.1,55.77,23.72,0,0,0,71.79,0.51,0,1.28,0,75.9,0
,,avg,15.38,12.82,28.21,62.18,16.03,5.13,15.38,25.64,58.97,16.92,26.92,24.36,2.56,68.21,20
,MAD,affirmative,0,5.13,43.59,57.69,13.46,10.26,11.11,43.59,61.03,17.44,1.28,0,0,60.51,0
,,negative,10.26,7.69,17.95,67.31,10.26,15.38,11.11,10.26,69.74,10.26,1.28,1.28,2.56,64.62,0.51
,,morderator,7.69,5.13,2.56,51.28,4.49,23.08,13.68,100,50.77,31.79,39.74,0,0,38.46,16.92
,,judge,15.38,7.69,12.82,71.15,9.62,5.13,11.11,20.51,68.21,10.77,0,0,0,68.21,0.51
,,avg,5.13,6.41,20.51,61.54,9.62,17.95,10.26,43.59,63.08,18.46,10.26,0,0,58.46,3.59
,SC,sc1,0,11.54,66.67,58.33,22.44,0,0,0,77.95,0,1.28,1.28,2.56,80.51,0
,,sc2,2.56,16.67,66.67,58.33,24.36,0,0,2.56,81.03,0,0,0,0,78.46,0
,,sc3,12.82,11.54,53.85,67.95,21.79,2.56,0.85,0,77.44,0,0,0,0,77.95,0
,,sc4,28.21,10.26,51.28,61.54,25.64,0,0,0,77.95,0,1.28,1.28,2.56,81.03,0.51
,,sc5,23.08,14.1,66.67,64.74,30.13,0,0.85,2.56,66.15,0,11.54,7.69,0,76.92,7.69
,,aggregate,56.41,46.15,2.56,26.92,35.9,15.38,92.31,97.44,17.95,78.97,96.15,100,0,62.05,78.46
,,avg,23.08,16.67,53.85,57.05,26.92,5.13,14.53,17.95,66.67,13.33,19.23,19.23,0,76.41,13.85
,Debate,debater_0,7.69,26.92,20.51,79.49,19.23,0,0,2.56,78.46,0,1.28,0,0,64.62,0.51
,,debater_1,10.26,23.08,17.95,76.28,18.59,0,0.85,0,65.13,0,0,0,0,65.13,0
,,debater_2,7.69,8.97,23.08,80.77,11.54,2.56,0.85,0,66.15,0.51,1.28,1.28,0,64.62,0.51
,,aggregator,20.51,32.05,2.56,29.49,21.79,100,100,100,3.08,100,71.79,66.67,2.56,51.28,55.9
,,avg,12.82,21.79,15.38,66.03,17.31,28.21,24.79,25.64,52.82,24.62,19.23,15.38,2.56,61.54,13.85
,ALL,avg,12.82,11.54,20.51,48.72,12.82,38.46,18.8,51.28,45.13,30.26,25.64,20.51,0,54.36,18.97
Code,CAMEL,assistant,5,0,0,13.33,0.83,90,24.44,96.67,14,52.67,51.67,1.67,0,18,19.33
,,user_proxy,0,1.67,0,15,0.83,93.33,28.89,96.67,10.67,55.33,30,0,0,16.67,12
,,critic,0,1.67,0,23.33,0,36.67,16.67,36.67,18.67,24.67,0,0,0,19.33,0
,,task_specifier,35,0,0,10,16.67,96.67,28.89,96.67,10.67,56.67,1.67,0,0,15.33,1.33
,,avg,11.67,0,0,15.83,5.83,80,23.33,80,14,46.67,23.33,0,0,16.67,8.67
,AutoGen,assistant,96.67,90,0,28.33,93.33,93.33,97.78,100,4,96.67,51.67,83.33,0,32,54.67
,,user_proxy,26.67,0,0,51.67,14.17,40,6.67,46.67,47.33,20,0,0,6.67,46,2
,,avg,61.67,46.67,0,39.17,54.17,63.33,52.22,70,26.67,58,26.67,43.33,3.33,40,28.67
,AgentVerse,role_assigner,0,1.67,0,63.33,0,0,1.11,0,64.67,0.67,0,1.67,3.33,65.33,0
,,solver,76.67,25,0,52.5,50.83,0,0,3.33,26,60.67,55,100,0,40,62.67
,,evaluator,0,1.67,0,64.17,0.83,0,2.22,0,65.33,1.33,1.67,1.67,3.33,64,0
,,critic_0,0,1.67,0,65.83,0,0,0,0,64.67,0,0,1.67,0,63.33,0
,,avg,20,8.33,0,60.83,13.33,3.33,0,0,54.67,14.67,11.67,26.67,3.33,57.33,14.67
,MAD,affirmative,0,0,0,52.5,0.83,20,23.33,86.67,37.33,34.67,0,0,23.33,40,6
,,negative,1.67,1.67,0,59.17,0,0,22.22,3.33,50,12.67,3.33,0,0,44.67,0.67
,,morderator,40,0,0,50.83,20.83,43.33,21.11,100,34.67,40,0,0,0,18,0
,,judge,0,0,0,48.33,0,6.67,13.33,16.67,47.33,12.67,0,1.67,0,36.67,0
,,avg,8.33,0,0,52.5,5,20,20,53.33,41.33,24.67,0,0,10,34.67,0.67
,MetaGPT,product_manager,6.67,43.33,0,27.5,23.33,33.33,76.67,100,12.67,73.33,1.67,1.67,60,28,10.67
,,architect,60,81.67,0,22.5,71.67,66.67,77.78,100,6,81.33,1.67,3.33,90,36,18.67
,,project_manager,48.33,98.33,0,40.83,72.5,0,82.22,100,12,68,0,1.67,0,34,0
,,engineer,93.33,61.67,0,31.67,79.17,80,71.11,100,1.33,79.33,1.67,0,10,36,1.33
,,qa_engineer,51.67,98.33,0,38.33,76.67,0,100,96.67,10,79.33,1.67,98.33,0,34,59.33
,,avg,50,78.33,0,30.83,64.17,33.33,82.22,100,7.33,76.67,0,20,33.33,34,18.67
,SC,sc1,1.67,8.33,0,61.67,5.83,3.33,0,0,61.33,0,1.67,0,0,64,0.67
,,sc2,13.33,5,0,61.67,8.33,0,1.11,3.33,62,0,0,1.67,0,64,0.67
,,sc3,10,11.67,0,59.17,10.83,3.33,0,3.33,60.67,0,1.67,1.67,0,62.67,0
,,sc4,6.67,1.67,0,60,5,3.33,0,3.33,60.67,0,0,1.67,3.33,64,0
,,sc5,31.67,6.67,0,59.17,17.5,0,1.11,3.33,60,0,0,0,0,62.67,0
,,aggregate,100,100,0,29.17,100,43.33,80,96.67,17.33,76.67,76.67,100,0,45.33,71.33
,,avg,26.67,21.67,0,55.83,25,6.67,14.44,13.33,54,12,11.67,15,3.33,60,12
,Debate,debater_0,33.33,30,0,61.67,31.67,3.33,1.11,0,61.33,0,33.33,40,0,61.33,30.67
,,debater_1,11.67,40,0,61.67,27.5,0,0,3.33,60.67,0,35,41.67,3.33,61.33,30
,,debater_2,25,21.67,0,60,22.5,0,0,0,61.33,0,35,38.33,3.33,62,30.67
,,aggregator,98.33,98.33,0,29.17,100,56.67,84.44,100,10,82.67,98.33,100,0,46,78.67
,,avg,40,48.33,0,51.67,45,10,22.22,30,47.33,20.67,48.33,53.33,3.33,57.33,42.67
,ALL,avg,28.33,26.67,0,40,27.5,36.67,36.67,60,28.67,44.67,11.67,18.33,3.33,36,14