﻿Task Domain,LLM-MAS,malicious agent,hijacking,,,,,Disruption,,,,,Disclosure,,,,
,,,adv_input,malicious_agent,message poison,overall,,adv_input,malicious_agent,message poison,overall,,adv_input,malicious_agent,message poison,overall,
,,,ASR,ASR,ASR,UA,ASR,ASR,ASR,ASR,UA/ASR,UA/ASR,ASR,ASR,ASR,UA/ASR,UA/ASR
Math,CAMEL,assistant,5.13,8.97,0,23.72,4.49,5.13,7.69,71.79,30.77,18.97,47.44,1.28,2.56,32.82,19.49
,,user_proxy,0,6.41,7.69,25.64,4.49,0,7.69,100,33.33,25.13,55.13,0,2.56,33.33,22.05
,,critic,5.13,3.85,12.82,37.82,7.05,7.69,3.42,0,37.44,2.05,0,0,0,37.95,0
,,task_specifier,2.56,5.13,0,28.21,1.92,79.49,2.56,100,25.64,37.44,2.56,0,0,31.79,1.03
,,avg,2.56,5.13,7.69,29.49,5.13,23.08,4.27,69.23,32.31,21.54,25.64,0,0,33.33,10.26
,AutoGen,assistant,12.82,15.38,0,46.79,10.26,33.33,41.88,100,31.79,52.82,47.44,71.79,0,57.95,48.21
,,user_proxy,23.08,16.67,20.51,67.95,18.59,0,0.85,30.77,70.26,5.64,1.28,1.28,35.9,76.41,7.69
,,avg,17.95,15.38,12.82,57.05,14.1,15.38,22.22,64.1,51.28,29.74,24.36,38.46,15.38,67.69,27.69
,AgentVerse,role_assigner,25.64,12.82,17.95,70.51,16.03,0,0.85,0,69.23,0,0,0,0,72.31,0
,,solver,12.82,6.41,0,49.36,8.33,7.69,54.7,100,34.36,54.87,50,52.56,0,54.87,40
,,evaluator,25.64,6.41,5.13,69.87,11.54,0,11.97,0,66.67,7.18,1.28,0,2.56,76.92,0
,,critic_0,12.82,15.38,61.54,57.69,26.28,0,0,0,69.74,0.51,0,0,2.56,73.33,0
,,avg,15.38,8.97,23.08,62.18,16.03,2.56,17.95,28.21,60,15.9,11.54,14.1,2.56,70.26,9.74
,MAD,affirmative,0,2.56,28.21,46.79,8.97,5.13,10.26,28.21,66.15,12.82,0,0,0,61.54,0
,,negative,2.56,1.28,2.56,62.82,3.21,2.56,14.53,5.13,67.69,9.74,0,1.28,0,63.59,0
,,moderator,10.26,2.56,2.56,47.44,2.56,17.95,19.66,97.44,52.82,35.9,1.28,0,2.56,23.59,0.51
,,judge,2.56,0,2.56,61.54,1.92,5.13,20.51,10.26,62.56,13.85,1.28,0,2.56,62.56,0
,,avg,5.13,1.28,10.26,54.49,4.49,7.69,15.38,33.33,61.54,18.97,1.28,0,2.56,52.82,0.51
,SC,sc1,7.69,20.51,64.1,59.62,27.56,2.56,16.24,0,67.18,9.74,6.41,1.28,71.79,76.41,15.38
,,sc2,5.13,17.95,58.97,61.54,24.36,2.56,5.98,0,72.31,4.1,3.85,2.56,5.13,71.79,5.13
,,sc3,15.38,19.23,46.15,64.1,25,2.56,2.56,0,74.87,1.54,2.56,3.85,7.69,74.36,3.59
,,sc4,12.82,8.97,43.59,66.03,19.87,0,5.98,0,70.77,3.59,2.56,0,10.26,72.31,3.08
,,sc5,7.69,10.26,58.97,58.97,20.51,2.56,19.66,0,65.13,11.79,6.41,7.69,51.28,76.92,15.9
,,aggregate,2.56,26.92,2.56,40.38,12.82,41.03,76.07,97.44,20,74.36,42.31,66.67,0,59.49,44.1
,,avg,7.69,17.95,46.15,58.33,21.15,7.69,20.51,17.95,61.54,17.95,8.97,12.82,25.64,71.79,14.87
,Debate,debater_0,15.38,5.13,17.95,76.28,12.18,0,29.06,2.56,60.51,18.46,8.97,7.69,0,78.97,7.18
,,debater_1,12.82,14.1,15.38,73.08,14.1,0,31.62,0,59.49,18.97,10.26,19.23,2.56,76.92,10.77
,,debater_2,20.51,14.1,15.38,76.92,16.67,2.56,29.91,2.56,58.46,18.97,7.69,12.82,0,78.97,8.72
,,aggregator,2.56,10.26,2.56,37.82,4.49,15.38,70.94,97.44,27.69,64.62,52.56,89.74,0,62.05,56.92
,,avg,17.95,10.26,17.95,75,14.1,0,31.62,2.56,60,17.95,8.97,11.54,0,77.95,8.72
,ALL,avg,7.69,7.69,10.26,50.64,9.62,12.82,16.24,46.15,51.79,21.03,16.67,12.82,5.13,56.41,12.82
Code,CAMEL,assistant,15,1.67,0,15.83,8.33,40,24.44,100,7.33,42,48.33,1.67,3.33,13.33,20
,,user_proxy,48.33,1.67,0,17.5,25.83,43.33,31.11,96.67,10.67,48,63.33,0,3.33,16,26
,,critic,0,0,0,16.67,0,23.33,26.67,6.67,14,24,0,0,0,18,0
,,task_specifier,43.33,0,0,7.5,20,90,33.33,96.67,8.67,58.67,33.33,1.67,0,17.33,13.33
,,avg,25,0,0,14.17,14.17,50,31.11,76.67,10.67,43.33,36.67,0,0,16.67,14
,AutoGen,assistant,96.67,66.67,0,24.17,80.83,83.33,90,96.67,4.67,91.33,100,93.33,0,44.67,77.33
,,user_proxy,13.33,0,0,50,7.5,10,3.33,13.33,48,5.33,3.33,1.67,3.33,52.67,3.33
,,avg,53.33,33.33,0,36.67,44.17,43.33,45.56,50,26.67,48.67,55,48.33,0,48.67,40.67
,AgentVerse,role_assigner,0,0,0,56.67,0,3.33,1.11,0,58,0,0,0,3.33,56.67,0
,,solver,95,0,0,41.67,47.5,0,40,100,35.33,46,98.33,98.33,0,47.33,80.67
,,evaluator,0,0,0,55.83,0,0,27.78,0,46.67,16.67,0,0,3.33,56,0.67
,,critic_0,0,1.67,0,55,0,0,0,3.33,57.33,0,0,1.67,0,58,0.67
,,avg,23.33,0,0,52.5,12.5,3.33,17.78,30,50,15.33,23.33,25,0,54.67,20
,MAD,affirmative,0,0,0,52.5,0.83,0,0,70,45.33,13.33,1.67,0,93.33,30.67,18
,,negative,5,0,0,50,2.5,30,0,0,50,6,0,0,0,34,0
,,morderator,15,0,0,34.17,6.67,3.33,1.11,100,40,19.33,0,0,3.33,19.33,0.67
,,judge,0,0,0,46.67,0,16.67,1.11,3.33,49.33,3.33,1.67,0,0,44.67,0
,,avg,3.33,0,0,45,2.5,10,1.11,40,46.67,10.67,0,0,20,33.33,4
,MetaGPT,product_manager,75,83.33,0,20.83,78.33,3.33,83.33,96.67,8.67,70,6.67,0,50,33.33,12.67
,,architect,81.67,95,0,18.33,87.5,3.33,74.44,100,10,65.33,10,6.67,0,38.67,8.67
,,project_manager,98.33,100,0,21.67,100,0,77.78,100,10,66,48.33,50,0,38,40.67
,,engineer,98.33,100,0,25.83,99.17,30,75.56,100,8.67,70.67,55,51.67,0,32.67,42
,,qa_engineer,98.33,98.33,0,23.33,100,50,100,100,4.67,88.67,100,100,3.33,38.67,79.33
,,avg,91.67,95,0,22.5,91.67,10,82.22,100,9.33,72,43.33,43.33,10,36.67,36
,SC,sc1,0,0,0,59.17,0,0,10,3.33,54.67,6,28.33,30,100,60,42
,,sc2,0,1.67,0,57.5,1.67,3.33,3.33,0,58.67,2,28.33,25,93.33,60,40.67
,,sc3,0,0,0,59.17,0,3.33,4.44,0,57.33,2,28.33,40,66.67,61.33,40
,,sc4,0,0,0,56.67,0,0,8.89,0,57.33,4.67,23.33,21.67,86.67,61.33,36.67
,,sc5,0,1.67,0,60,0.83,0,28.89,0,48,18,18.33,41.67,80,58.67,40
,,aggregate,98.33,95,0,25.83,99.17,53.33,77.78,100,16.67,78,98.33,100,3.33,48.67,79.33
,,avg,15,15,0,54.17,17.5,6.67,21.11,16.67,48.67,18,40,43.33,73.33,57.33,46.67
,Debate,debater_0,0,0,0,55.83,0,0,30,0,48.67,17.33,58.33,21.67,3.33,58.67,32
,,debater_1,0,0,0,57.5,0,3.33,18.89,3.33,54,12,48.33,33.33,0,59.33,32.67
,,debater_2,0,1.67,0,55.83,0,0,25.56,3.33,53.33,15.33,55,31.67,3.33,61.33,34.67
,,aggregator,0,1.67,0,28.33,95.83,56.67,73.33,100,14,74,100,100,3.33,48,80
,,avg,0,1.67,0,48.33,25,16.67,37.78,30,42.67,30,65,45,0,56,44
,ALL,avg,41.67,25,0,33.33,33.33,20,36.67,56.67,27.33,38.67,33.33,23.33,6.67,38.67,24