import requests, json, os, sys
from utils.wikidata_types import get_wikidata_types

def query_wikidata(query):
    url = 'https://query.wikidata.org/sparql'
    headers = {
        'Accept': 'application/sparql-results+json',
        'User-Agent': 'fccbc90d5a6aad243121263c4cbddd8c'
    }
    response = requests.get(url, headers=headers, params={'query': query})
    response.raise_for_status()
    return response.json()



def main():
    with open('onto_data.json', 'r') as f:
        data = json.load(f)



    # sparql query for deserts located in a country desert: Q8514 , forest: Q4421, mountain: Q8502, river: Q4022, museum: Q33506
    # sparql_desert_country = """
    #     SELECT ?desertLabel WHERE {
    #     ?desert wdt:P31 wd:Q33506;   # instance of desert
    #             wdt:P17 wd:Q1025.   # located in country
    #     SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    #     }
    #     LIMIT 100
    # """
    # axiom = """LocatedIn(A, B)"""
    #results = query_wikidata(sparql_desert_country)
    # for result in results['results']['bindings']:
    #     desert = result['desertLabel']['value']
    #     #body = result['bodyLabel']['value']
    #     country = 'Mauritania'
    #     if desert.startswith('Q') or country.startswith('Q'):
    #         continue
        
    #     data.append(
    #         {
    #             'query_id': f'{(len(data))}',
    #             'axiom': axiom,
    #             'triples': [ f'LocatedIn({desert}, {country})'],
    #             'axiomatic_format': f"""LocatedIn({desert}, {country})""",
    #             'types': {desert: 'museum', country: 'country'} })

    # problematic_queries = []
    # for entry in data:
    #     if entry['axiom'] == axiom and int(entry['query_id']) < 1564:
    #         print(entry['query_id'])
    #         ent = list(entry['types'].keys())[0]
    #         if 'forest' not in entry['types'][ent]:
    #             continue
    #         types = get_wikidata_types(ent)
            
    #         if types:
    #             if isinstance(types, dict):
    #                 types = types[ent]
    #             if 'forest' in types:
    #                 entry['types'][ent] = 'forest'

    #             elif 'desert' in types:
    #                 entry['types'][ent] = 'desert'
    #             elif 'mountain' in types:
    #                 entry['types'][ent] = 'mountain'
    #             elif 'river' in types:
    #                 entry['types'][ent] = 'river'
    #             elif 'museum' in types:
    #                 entry['types'][ent] = 'museum'
    #             else:
    #                 problematic_queries.append(entry['query_id'])
            
    #         else:
    #             problematic_queries.append(entry['query_id'])

    

    # print(problematic_queries)





    # sparql query for deserts located in Asia

    # sparql_desert_country = """
    #     SELECT ?desertLabel ?countryLabel WHERE {
    #     ?desert wdt:P31 wd:Q8514;   # instance of desert
    #             wdt:P17 ?country.   # located in country
    #     ?country wdt:P30 wd:Q48.    # country located in Asia
    #     SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    #     }
    #     LIMIT 50
    # """

    # sparql query for deserts located in a particular country

    # sparql_desert_country = """
    #     SELECT ?desertLabel WHERE {
    #     ?desert wdt:P31 wd:Q8514;   # instance of desert
    #             wdt:P17 wd:Q1028.   # located in country
    #     SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    #     }
    #     LIMIT 50
    # """

    # results = query_wikidata(sparql_desert_country)
    # for result in results['results']['bindings']:
    #     desert = result['desertLabel']['value']
    #     #country = result['countryLabel']['value']
    #     country = 'Morocco'
    #     if desert.startswith('Q') or country.startswith('Q'):
    #         continue
        
    #     data.append(
    #         {
    #             'query_id': f'{(len(data))}',
    #             'axiom': axiom,
    #             'triples': [ f'PartOf({desert}, {country})', f'PartOf({country}, Africa)'],
    #             'axiomatic_format': f'PartOf({desert}, {country}) && PartOf({country}, Africa) => PartOf({desert}, Africa)',
    #             'types': {desert: 'desert', country: 'country', 'Africa': 'continent'} })

    # sparql query for finding tourism attractions at streets in a country

    sparql_attraction_country = """
        SELECT ?attractionLabel ?streetLabel WHERE {
        ?attraction wdt:P31 wd:Q33506;   # instance of museum
                wdt:P276 ?street;   # at street
                wdt:P17 wd:Q232.   # located in country
        SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
        }
        LIMIT 100
    """

    axiom = """FOR_ALL A,B,C At(A, B) && PartOf(B, C) => At(A, C)"""
    results = query_wikidata(sparql_attraction_country)
    new_data = []
    for result in results['results']['bindings']:
        attraction = result['attractionLabel']['value']
        street = result['streetLabel']['value']
        #country = result['countryLabel']['value']
        country = 'Kazakhstan'
        if attraction.startswith('Q') or street.startswith('Q'):
            continue
        new_data.append(1)
        
        data.append(
            {
                'query_id': f'{(len(data))}',
                'axiom': axiom,
                'triples': [ f'At({attraction}, {street})', f'PartOf({street}, {country})'],
                'axiomatic_format': f'At({attraction}, {street}) && PartOf({street}, {country}) => At({attraction}, {country})',
                'types': {attraction: 'museum', street: 'street', country: 'country'} })

    print(len(new_data))
    # sparql query for finding lakes contained in a county which is contained in a country

    # sparql_border_country = """
    #     SELECT ?lakeLabel ?provinceLabel WHERE {
    #       ?lake wdt:P31 wd:Q23397;
    #             wdt:P131 ?province.
    #       ?province wdt:P17 wd:Q79.
    #     SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    #     }
    #     LIMIT 100
    # """
    # axiom = """FOR_ALL A,B,C ContainedIn(A, B) && ContainedIn(B, C) => ContainedIn(A, C)"""
    # results = query_wikidata(sparql_border_country )
    # for result in results['results']['bindings']:
    #     lake = result['lakeLabel']['value']
    #     province = result['provinceLabel']['value']
    #     #country = result['countryLabel']['value']
    #     country = 'Egypt'
    #     if lake.startswith('Q') or province.startswith('Q'):
    #         continue
        
    #     data.append(
    #         {
    #             'query_id': f'{(len(data))}',
    #             'axiom': axiom,
    #             'triples': [ f'CotainedIn({lake}, {province})', f'ContainedIn({province}, {country})'],
    #             'axiomatic_format': f'ContainedIn({lake}, {province}) && ContainedIn({province}, {country}) => ContainedIn({lake}, {country})',
    #             'types': {lake: 'lake', province: 'territory', country: 'country'} })

    # sparql query for finding islands contained in a body of water
    # sparql_island_water = """
    #     SELECT ?islandLabel WHERE {
    #       ?island wdt:P31 wd:Q23442;
    #             wdt:P206 wd:Q45341.
    #     SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    #     }
    #     LIMIT 100
    # """ 
    # axiom = """FOR_ALL A,B,C ContainedIn(A, B) | A: land, B: body of water => Island(A)"""
    # results = query_wikidata(sparql_island_water)
    # for result in results['results']['bindings']:
    #     island = result['islandLabel']['value']
    #     #body = result['bodyLabel']['value']
    #     body = 'East China Sea'
    #     if island.startswith('Q') or body.startswith('Q'):
    #         continue
        
    #     data.append(
    #         {
    #             'query_id': f'{(len(data))}',
    #             'axiom': axiom,
    #             'triples': [ f'ContainedIn({island}, {body})'],
    #             'axiomatic_format': f'ContainedIn({island}, {body}) | A: land, B: body of water => Island({island})',
    #             'types': {island: 'land', body: 'body of water'} })
    
    with open('geo_data.json', 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4, ensure_ascii=False)

    # forming the a-box and t-box files
    a_box_set = set()
    for entry in data:
        triples = entry['triples']
        for triple in triples:
            a_box_set.add(triple)

    a_box = list(a_box_set)
    with open('a-box.json', 'w', encoding='utf-8') as f:
        json.dump(a_box, f, indent=4, ensure_ascii=False)

if __name__ == '__main__':
    main()

