{
    "energy" : {
        "huggingface/gptj_6b": {
            "value" : 33.5,
            "description": "256 TPU v3 cores (or a single pod slice) were used for 5 weeks of real time in GCP europe-west4-a. We use a PUE of 1.1, 283W TDP per chip (2 cores per chip), and an average carbon intensity of 0.410 kCO2/kWh."
        },
        "gooseai/gpt-j-6b": {
            "value" : 33.5,
            "description": "256 TPU v3 cores (or a single pod slice) were used for 5 weeks of real time in GCP europe-west4-a. We use a PUE of 1.1, 283W TDP per chip (2 cores per chip), and an average carbon intensity of 0.410 kCO2/kWh."
        },
        "together/gpt-j-6b": {
            "value" : 33.5,
            "description": "256 TPU v3 cores (or a single pod slice) were used for 5 weeks of real time in GCP europe-west4-a. We use a PUE of 1.1, 283W TDP per chip (2 cores per chip), and an average carbon intensity of 0.410 kCO2/kWh."
        },
        "gooseai/gpt-neo-20b": {
            "value" : 60.1,
            "description" : "Metrics for GPT-NeoX (20B) were provided by the authors (Black et al., 2022). The authors report 66.2 MWh of energy usage and 35 metric tons of CO2eq. We note that authors suggest that this number includes training, scaling, testing, and evaluation. For the training process the authors estimate 31.7 metric tons of CO2eq and roughly 60.1 MWh. For fairness and since we were only able to identify information from other models on training, we rely on this latter number." 
        },
        "together/gpt-neox-20b": {
            "value" : 60.1,
            "description" : "Metrics for GPT-NeoX (20B) were provided by the authors (Black et al., 2022). The authors report 66.2 MWh of energy usage and 35 metric tons of CO2eq. We note that authors suggest that this number includes training, scaling, testing, and evaluation. For the training process the authors estimate 31.7 metric tons of CO2eq and roughly 60.1 MWh. For fairness and since we were only able to identify information from other models on training, we rely on this latter number." 
        },
        "openai/ada": {
            "value" : 2.6,
            "description" : "We rely on estimates from Patterson et al. (2021) and Brown et al. (2020). In particular, Patterson et al. (2021) explicitly reports estimates for GPT-3 (175B); for other model variants, we scale the number of floating-point operations by the size of the model."
        },
        "openai/babbage": {
            "value" : 9.8,
            "description" : "We rely on estimates from Patterson et al. (2021) and Brown et al. (2020). In particular, Patterson et al. (2021) explicitly reports estimates for GPT-3 (175B); for other model variants, we scale the number of floating-point operations by the size of the model."
        },
        "openai/curie": {
            "value" : 49.2,
            "description" : "We rely on estimates from Patterson et al. (2021) and Brown et al. (2020). In particular, Patterson et al. (2021) explicitly reports estimates for GPT-3 (175B); for other model variants, we scale the number of floating-point operations by the size of the model."
        },
        "openai/davinci": {
            "value" : 1287.1,
            "description" : "We rely on estimates from Patterson et al. (2021) and Brown et al. (2020). In particular, Patterson et al. (2021) explicitly reports estimates for GPT-3 (175B); for other model variants, we scale the number of floating-point operations by the size of the model."
        },
        "microsoft/TNLGv2_530B": {
            "value" : 1703.1,
            "description" : "560 8-A100 servers were used for training for an estimated 36 days in Selene located in California. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average California carbon intensity of 0.238 kCO2/kWh."
        },
        "together/opt-175b": {
            "value" : 345.7,
	    "description": "992 A100 GPUs were used for training for 33 days in Azure us-east-2. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average carbon intensity of 0.367 kCO2/kWh."
	},
        "together/opt-66b": {
            "value" : 178.4,
            "description": "512 A100 GPUs were used for training for 33 days in Azure us-east-2. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average carbon intensity of 0.367 kCO2/kWh."
        },
        "together/yalm": {
            "value" : 549.1,
            "description": "800 A100 GPUs were used for training for 65 days in Yandex ru-central1-a. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average carbon intensity of 0.357 kCO2/kWh."
        },
        "together/bloom": {
            "value" : 474.4,
            "description": "384 A100 GPUs were used for training for 117 days in France. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average carbon intensity of 0.064 kCO2/kWh."
        }
    },
    "carbon" : {
        "huggingface/gptj_6b": {
            "value" : 13.8,
            "description": "256 TPU v3 cores (or a single pod slice) were used for 5 weeks of real time in GCP europe-west4-a. We use a PUE of 1.1, 283W TDP per chip (2 cores per chip), and an average carbon intensity of 0.410 kCO2/kWh."
        },
        "gooseai/gpt-j-6b": {
            "value" : 13.8,
            "description": "256 TPU v3 cores (or a single pod slice) were used for 5 weeks of real time in GCP europe-west4-a. We use a PUE of 1.1, 283W TDP per chip (2 cores per chip), and an average carbon intensity of 0.410 kCO2/kWh."
        },
        "together/gpt-j-6b": {
            "value" : 13.8,
            "description": "256 TPU v3 cores (or a single pod slice) were used for 5 weeks of real time in GCP europe-west4-a. We use a PUE of 1.1, 283W TDP per chip (2 cores per chip), and an average carbon intensity of 0.410 kCO2/kWh."
        },
        "gooseai/gpt-neo-20b": {
            "value" : 31.7,
            "description" : "Metrics for GPT-NeoX (20B) were provided by the authors (Black et al., 2022). The authors report 66.2 MWh of energy usage and 35 metric tons of CO2eq. We note that authors suggest that this number includes training, scaling, testing, and evaluation. For the training process the authors estimate 31.7 metric tons of CO2eq and roughly 60.1 MWh. For fairness and since we were only able to identify information from other models on training, we rely on this latter number."
        },
        "together/gpt-neox-20b": {
            "value" : 31.7,
            "description" : "Metrics for GPT-NeoX (20B) were provided by the authors (Black et al., 2022). The authors report 66.2 MWh of energy usage and 35 metric tons of CO2eq. We note that authors suggest that this number includes training, scaling, testing, and evaluation. For the training process the authors estimate 31.7 metric tons of CO2eq and roughly 60.1 MWh. For fairness and since we were only able to identify information from other models on training, we rely on this latter number."
        },
        "openai/ada": {
            "value" : 1.1,
            "description" : "We rely on estimates from Patterson et al. (2021) and Brown et al. (2020). In particular, Patterson et al. (2021) explicitly reports estimates for GPT-3 (175B); for other model variants, we scale the number of floating-point operations by the size of the model."
        },
        "openai/babbage": {
            "value" : 4.2,
            "description" : "We rely on estimates from Patterson et al. (2021) and Brown et al. (2020). In particular, Patterson et al. (2021) explicitly reports estimates for GPT-3 (175B); for other model variants, we scale the number of floating-point operations by the size of the model."
        },
        "openai/curie": {
            "value" : 21.1,
            "description" : "We rely on estimates from Patterson et al. (2021) and Brown et al. (2020). In particular, Patterson et al. (2021) explicitly reports estimates for GPT-3 (175B); for other model variants, we scale the number of floating-point operations by the size of the model."
        },
        "openai/davinci": {
            "value" : 552.2,
            "description" : "We rely on estimates from Patterson et al. (2021) and Brown et al. (2020). In particular, Patterson et al. (2021) explicitly reports estimates for GPT-3 (175B); for other model variants, we scale the number of floating-point operations by the size of the model."
        },
        "microsoft/TNLGv2_530B": {
            "value" : 405.3,
            "description" : "560 8-A100 servers were used for training for an estimated 36 days in Selene located in California. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average California carbon intensity of 0.238 kCO2/kWh."
        },
        "together/opt-175b": {
            "value" : 127.2,
            "description": "992 A100 GPUs were used for training for 33 days in Azure us-east-2. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average carbon intensity of 0.367 kCO2/kWh."
        },
        "together/opt-66b": {
            "value" : 65.6,
            "description": "512 A100 GPUs were used for training for 33 days in Azure us-east-2. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average carbon intensity of 0.367 kCO2/kWh."
        },
        "together/yalm": {
            "value" : 196.0,
            "description": "800 A100 GPUs were used for training for 65 days in Yandex ru-central1-a. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average carbon intensity of 0.357 kCO2/kWh."
        },
        "together/bloom": {
            "value" : 30.4,
            "description": "384 A100 GPUs were used for training for 117 days in France. We use a PUE of 1.1, 400W TDP for A100 GPUs, and an average carbon intensity of 0.064 kCO2/kWh."
        }
    }
}
