# %%
import json

# %%
with open('data.json', 'r') as f:
    law = json.load(f)
with open('data_leaf.json', 'r') as f:
    law_leaf = json.load(f)

# %%
law_leaf.keys()

# %%
print(law_leaf[list(law_leaf.keys())[20]].keys())

# %%


# %%
prompt_template = '''## Role: 
You are a legal expert specializing in EU regulations, tasked with generating realistic legal case scenarios based on the EU AI Act. The scenarios can represent {result} samples.

## Rules:
{rules}

## Task:
Develop a realistic legal case scenario of the EU AI Act, including:
- Parties Involved: Define the plaintiff(s), defendant(s), and any relevant third parties (e.g., AI developers, users, or regulatory bodies).
- Factual Background: Provide a detailed narrative of events leading to the scenario, ensuring alignment with the EU AI Act and real-world plausibility.
- Legal Issues: Identify specific legal questions or issues, referencing relevant articles of the EU AI Act.
- Arguments: Outline primary arguments for both plaintiff and defendant (if applicable) or stakeholders, grounded in the EU AI Act. 
- Jurisdiction: Specify the EU member state or EU-level jurisdiction and relevant context (e.g., industry, AI system type).

## Constraints:
- Ensure the case aligns with the EU AI Act's provisions.
- Create realistic and complex scenarios; focus on clarity and practical applicability.
- Include sufficient details.

## Output Format:
Return the output in JSON format with only one layer, where the value of the dictionary should be in the format of string text:
{{
"parties_involved": "plaintiff, defendant, and other third parties",
"factual_background": "Describe the facts. Do not directly states whether the system is compliant with the EU AI Act or not. Even do not explicitly mention EU AI Act.",
"legal_issues": "Legal questions or issues.",
"arguments": "Primary arguments for involved parties.",
"jurisdiction": "The official power to make legal decisions."
}}
'''

# %%
law_leaf[list(law_leaf.keys())[120]]

# %%
print(law_leaf[list(law_leaf.keys())[100]]['text'])

# %%
prompt_try_1 = prompt_template.format(result='POSITIVE', rules=law_leaf[list(law_leaf.keys())[100]]['text'])
print(prompt_try_1)

# %%
len(law_leaf.keys())

# %%
with open('annex.json', 'r') as file:
    annex_dict = json.load(file)

with open('recital.json', 'r') as file:
    recital_dict = json.load(file)

# %%
ai_act_dict_save = {
    'prompt': [],
    'label': [],
    'meta': []
}

for i, k_ in enumerate(law_leaf.keys()):
    text_origin = law_leaf[k_]['text'] + '\n\n'
    recitals_index = law_leaf[k_]['recitals']
    annexes_index = law_leaf[k_]['annexes']

    if recitals_index:
        for idx_reci in recitals_index:
            text_origin += idx_reci + ': ' + recital_dict[idx_reci] + '\n'
    if annexes_index:
        for idx_annex in annexes_index:
            text_origin += idx_annex + ': ' + annex_dict[idx_annex]['content'] + '\n'
    
    prompt_pos_temp = prompt_template.format(result='POSITIVE', rules=text_origin)
    prompt_neg_temp = prompt_template.format(result='NEGATIVE', rules=text_origin)

    ai_act_dict_save['prompt'].append(prompt_pos_temp)
    ai_act_dict_save['prompt'].append(prompt_neg_temp)

    ai_act_dict_save['label'].append('positive')
    ai_act_dict_save['label'].append('negative')

    ai_act_dict_save['meta'].append(law_leaf[k_])
    ai_act_dict_save['meta'].append(law_leaf[k_])


# %%
import pandas as pd
from datasets import Dataset

# %%
data_save_pd = pd.DataFrame(ai_act_dict_save)
data_save_dataset_form = Dataset.from_pandas(data_save_pd)
data_save_dataset_form.save_to_disk('ai_act_prompt_prepared_08_24')

# %%
data_save_dataset_form[0]

# %%
def find_keys_with_pattern(d, pattern):
    matching_keys = [key for key in d.keys() if pattern in key]
    return matching_keys

# %%
find_keys_with_pattern(law, 'article10')

# %%
law['EU_AI_ACT.chapter13.article113.It']

# %%
art_dict_save = {}
for i in range(1,114):
    # for each article
    art_keys = find_keys_with_pattern(law, f'article{i}')
    art_key = art_keys[0]
    art_keys_sub_sect = find_keys_with_pattern(law, f'article{i}.')
    # print(art_keys_sub_sect)
    art_str = ''
    art_str += law[art_key]['text']
    for k_ in art_keys_sub_sect:
        art_str += law[k_]['text'].splitlines()[-1]
    print(art_str)
    print('====================================')
    art_dict_save[f'Article {i}'] = art_str

# %%
art_dict_save

# %%
with open('eu_ai_act_law_dictionary_version.json', 'w') as f:
    json.dump(art_dict_save, f, indent=4)




