﻿Task Domain,LLM-MAS,malicious agent,hijacking,,,,,Disruption,,,,,Disclosure,,,,
,,,adv_input,malicious_agent,message poison,overall,,adv_input,malicious_agent,message poison,overall,,adv_input,malicious_agent,message poison,overall,
,,,ASR,ASR,ASR,UA,ASR,ASR,ASR,ASR,UA,ASR,ASR,ASR,ASR,UA,ASR
Math,CAMEL,assistant,2.56,10.26,0,26.28,4.49,82.05,0.85,74.36,22.56,30.77,87.18,1.28,2.56,30.26,35.38
,,user_proxy,0,6.41,5.13,24.36,3.85,74.36,1.71,100,20,36.41,0,0,2.56,16.92,0
,,critic,2.56,7.69,7.69,37.82,7.05,2.56,0.85,0,37.95,0,0,0,0,27.69,0
,,task_specifier,10.26,3.85,2.56,26.92,4.49,100,0,100,22.56,40.51,1.28,0,0,27.69,0.51
,,avg,2.56,5.13,7.69,29.49,5.77,66.67,0,71.79,26.67,27.18,20.51,0,0,25.13,8.72
,AutoGen,assistant,33.33,17.95,0,18.59,16.67,89.74,94.87,100,2.05,95.9,98.72,98.72,0,57.95,79.49
,,user_proxy,12.82,19.23,41.03,58.97,22.44,28.21,3.42,64.1,65.13,19.49,3.85,1.28,0,73.85,1.54
,,avg,25.64,17.95,23.08,37.82,19.23,58.97,49.57,82.05,33.85,57.95,51.28,51.28,0,66.67,40
,AgentVerse,role_assigner,7.69,10.26,17.95,72.44,10.26,0,0.85,0,73.33,0,0,0,0,68.21,0
,,solver,28.21,15.38,0,48.72,16.67,33.33,64.96,100,23.08,66.15,100,100,0,59.49,79.49
,,evaluator,2.56,11.54,15.38,69.23,10.9,0,0,0,70.77,0,1.28,0,2.56,68.21,0
,,critic_0,10.26,8.97,64.1,56.41,23.08,0,0,0,71.28,0.51,0,0,5.13,74.87,0.51
,,avg,10.26,11.54,28.21,62.18,16.03,7.69,17.09,28.21,59.49,16.92,24.36,26.92,2.56,68.72,19.49
,MAD,affirmative,0,6.41,43.59,57.69,14.74,12.82,11.11,41.03,61.54,17.44,0,0,0,60,0
,,negative,12.82,6.41,15.38,66.67,11.54,15.38,11.11,7.69,70.26,10.77,0,1.28,0,65.64,0
,,morderator,10.26,6.41,2.56,51.28,4.49,20.51,11.97,97.44,50.77,31.79,42.31,0,2.56,38.97,16.92
,,judge,12.82,7.69,10.26,71.15,10.9,10.26,11.11,17.95,68.72,10.77,1.28,0,2.56,68.21,0
,,avg,10.26,6.41,17.95,61.54,10.9,15.38,10.26,38.46,62.05,18.46,11.54,0,2.56,58.46,4.62
,SC,sc1,0,12.82,64.1,58.33,21.79,2.56,0,0,77.44,0,1.28,1.28,2.56,80.51,0
,,sc2,2.56,16.67,64.1,59.62,24.36,2.56,0,0,80.51,0.51,0,0,0,78.97,0.51
,,sc3,17.95,11.54,51.28,69.23,23.08,2.56,0,0,77.95,0,0,1.28,0,77.95,0
,,sc4,25.64,11.54,53.85,61.54,26.92,0,0,0,77.95,0,0,0,0,80.51,0
,,sc5,25.64,16.67,71.79,65.38,31.41,2.56,0.85,0,65.64,0.51,11.54,8.97,0,77.95,7.69
,,aggregate,58.97,46.15,2.56,28.21,36.54,12.82,92.31,97.44,18.97,78.46,96.15,100,0,62.05,79.49
,,avg,20.51,19.23,51.28,57.05,26.92,2.56,14.53,17.95,66.15,13.33,16.67,17.95,0,76.41,14.87
,Debate,debater_0,5.13,24.36,20.51,79.49,19.87,0,0,2.56,77.95,0.51,2.56,0,0,65.64,1.03
,,debater_1,5.13,23.08,23.08,76.28,18.59,0,0,0,65.13,0,2.56,1.28,2.56,65.13,0.51
,,debater_2,10.26,6.41,17.95,80.77,10.9,2.56,0,2.56,65.13,0.51,1.28,0,0,64.62,1.03
,,aggregator,25.64,33.33,2.56,28.85,21.79,100,100,97.44,4.1,100,70.51,67.95,0,50.77,55.38
,,avg,12.82,20.51,17.95,66.03,17.95,25.64,25.64,28.21,53.33,24.62,20.51,15.38,0,61.03,14.36
,ALL,avg,7.69,11.54,15.38,48.08,12.82,35.9,20.51,51.28,46.15,29.74,28.21,19.23,0,54.87,18.97
Code,CAMEL,assistant,1.67,1.67,0,13.33,1.67,96.67,23.33,96.67,14,52,51.67,1.67,3.33,17.33,20
,,user_proxy,0,1.67,0,15.83,0.83,86.67,28.89,96.67,10.67,55.33,28.33,0,3.33,16,12
,,critic,0,0,0,23.33,0,33.33,16.67,30,20,24.67,0,0,0,19.33,0
,,task_specifier,36.67,0,0,10,16.67,96.67,26.67,96.67,11.33,56,1.67,1.67,0,16,0.67
,,avg,8.33,0,0,15,5.83,80,25.56,83.33,14.67,46.67,21.67,0,0,18,7.33
,AutoGen,assistant,96.67,91.67,0,26.67,93.33,96.67,95.56,96.67,4.67,97.33,50,85,0,32,54
,,user_proxy,26.67,0,0,50.83,14.17,36.67,5.56,46.67,47.33,18.67,0,1.67,3.33,46.67,1.33
,,avg,60,45,0,38.33,54.17,63.33,50,66.67,26.67,58,26.67,45,0,39.33,28.67
,AgentVerse,role_assigner,0,0,0,64.17,0,3.33,1.11,0,64,0,0,0,3.33,66,0
,,solver,73.33,26.67,0,53.33,50,0,0,0,26,60.67,53.33,98.33,0,40,62.67
,,evaluator,0,0,0,63.33,0,0,1.11,0,64.67,0.67,0,0,3.33,63.33,0.67
,,critic_0,0,1.67,0,65,0,0,0,3.33,66,0,0,1.67,0,64,0.67
,,avg,16.67,6.67,0,61.67,13.33,3.33,0,3.33,55.33,15.33,11.67,25,0,58.67,15.33
,MAD,affirmative,0,0,0,54.17,0.83,13.33,23.33,90,36.67,34.67,1.67,0,26.67,39.33,4.67
,,negative,0,0,0,57.5,0,3.33,20,0,50,12.67,1.67,0,0,43.33,0
,,morderator,41.67,0,0,51.67,20,46.67,21.11,100,34.67,40,0,0,3.33,18,0.67
,,judge,0,0,0,48.33,0,6.67,15.56,23.33,48,14,1.67,0,0,36,0
,,avg,8.33,0,0,51.67,5,16.67,21.11,50,42.67,25.33,0,0,3.33,34.67,0.67
,MetaGPT,product_manager,6.67,45,0,27.5,25,36.67,77.78,96.67,12,73.33,0,0,53.33,26.67,10.67
,,architect,61.67,83.33,0,21.67,71.67,70,77.78,100,5.33,80.67,0,1.67,86.67,36.67,19.33
,,project_manager,48.33,96.67,0,40,74.17,0,81.11,100,12,68,0,0,0,34,0.67
,,engineer,96.67,65,0,32.5,80,83.33,71.11,100,0.67,78.67,3.33,0,6.67,36,1.33
,,qa_engineer,50,98.33,0,38.33,75.83,3.33,100,100,8.67,79.33,0,100,3.33,34,59.33
,,avg,53.33,76.67,0,32.5,64.17,33.33,82.22,100,8.67,75.33,0,21.67,30,34,18
,SC,sc1,1.67,5,0,61.67,4.17,0,0,3.33,61.33,0,1.67,0,3.33,64,0
,,sc2,11.67,6.67,0,60,10,3.33,1.11,0,61.33,0.67,0,0,3.33,63.33,0
,,sc3,10,8.33,0,59.17,10,3.33,1.11,0,60.67,0,0,1.67,0,63.33,0
,,sc4,6.67,3.33,0,59.17,4.17,0,0,0,60.67,0,0,0,0,64,0
,,sc5,30,6.67,0,60,18.33,0,0,0,60,0.67,0,0,0,63.33,0
,,aggregate,98.33,98.33,0,28.33,100,40,80,100,18,76.67,76.67,100,3.33,44.67,70.67
,,avg,25,20,0,55.83,25,3.33,12.22,16.67,53.33,12,15,16.67,3.33,59.33,12
,Debate,debater_0,31.67,30,0,60,31.67,0,0,0,60.67,0,33.33,41.67,3.33,60.67,30
,,debater_1,11.67,38.33,0,60.83,25.83,3.33,0,3.33,60.67,0,33.33,41.67,0,61.33,30
,,debater_2,23.33,23.33,0,60,21.67,0,0,3.33,61.33,0,36.67,38.33,3.33,62,30
,,aggregator,100,100,0,30,100,60,85.56,100,9.33,82,98.33,100,3.33,46,78.67
,,avg,40,50,0,51.67,45.83,16.67,22.22,30,48.67,20.67,50,53.33,0,56.67,41.33
,ALL,avg,31.67,25,0,39.17,29.17,36.67,36.67,56.67,28,45.33,13.33,18.33,6.67,37.33,14.67