
import gym
import re
import time
import json
from typing import List, Dict, Optional, Tuple
from datetime import datetime
import random
import sys


sys.path.append('Noisy_Webshop/agentenv-webshop/webshop')

try:
    from web_agent_site.envs import WebAgentTextEnv
except ImportError as e:
    print(f"Import webshop environment failed: {e}")
    sys.exit(1)


class WebShopAnalyzer:
    
    def __init__(self):
        self.env = None
        self.trajectory = []  
        self.current_product = None
        self.purchase_successful = False
        self.final_reward = 0.0
        self._init_environment()
    
    def _init_environment(self):
        
        try:
            config = {
                'observation_mode': 'text'
            }
            self.env = gym.make('WebAgentTextEnv-v0', **config)
            print("Environment initialization successful")
        except Exception as e:
            print(f"Environment initialization failed: {e}")
            raise
    
    def extract_price_from_observation(self, obs: str) -> Optional[float]:
        
        if not obs:
            return None
        
        price_patterns = [
            r'\$(\d+(?:,\d{3})*(?:\.\d{1,2})?)',  # $1,234.56, $100.0
            r'(\d+(?:,\d{3})*(?:\.\d{1,2})?)\s*dollars?',  # 1234.56 dollars
            r'price[:\s]*\$?(\d+(?:,\d{3})*(?:\.\d{1,2})?)',  # price: $1234.56
            r'cost[:\s]*\$?(\d+(?:,\d{3})*(?:\.\d{1,2})?)',  # cost: $1234.56
            r'(\d+(?:,\d{3})*(?:\.\d{1,2})?)\s*\$',  # 1234.56 $
        ]
        
        max_price = 0.0
        
        for pattern in price_patterns:
            matches = re.findall(pattern, obs, re.IGNORECASE)
            for match in matches:
                try:
                    price_str = match.replace(',', '')
                    price = float(price_str)
                    max_price = max(max_price, price)
                except ValueError:
                    continue
        
        return max_price if max_price > 0 else None
    
    def find_most_expensive_product(self) -> Optional[Dict]:

        price_conditions = ["price > 2000", "price > 1500", "price > 1000"]
        
        for condition in price_conditions:

            
            try:
          
                obs, _ = self.env.reset()
                
               
                search_action = f"search[{condition}]"
                obs, reward, done, info = self.env.step(search_action)
                
                
                products = self._parse_search_results(obs)
                
                if products:
                    
                    most_expensive = max(products, key=lambda x: x.get('price', 0))
                    print(f"The most expensive product is: {most_expensive['name']} - ${most_expensive['price']:.2f}")
                    if most_expensive.get('asin'):
                        print(f" Product ASIN: {most_expensive['asin']}")
                    else:
                        print(f" Product ASIN: not found")

                        asin_match = re.search(r'B[A-Z0-9]{9}', most_expensive['name'])
                        if asin_match:
                            most_expensive['asin'] = asin_match.group()
                    return most_expensive
                
            except Exception as e:
                print(f"Search failed: {e}")
                continue
        
        print("No expensive product found")
        return None
    
    def _parse_search_results(self, obs: str) -> List[Dict]:
       
        products = []
        
        parts = obs.split('[SEP]')
        current_asin = None
        current_product = {}
        
        for i, part in enumerate(parts):
            part = part.strip()
            if not part:
                continue
            
            
            if part.startswith('B') and len(part) == 10:
               
                if current_product and current_product.get('name'):
                    products.append(current_product)
                
              
                current_asin = part
                current_product = {
                    'name': None,
                    'asin': current_asin,
                    'price': None,
                    'description': ''
                }

            
           
            elif current_asin and not current_product.get('name') and not part.startswith('$'):
              
                if part not in ['Next >', '< prev', 'Back to Search', 'size', 'color']:
                    current_product['name'] = part
                    print(f" Product name: {part}")
            
            elif current_asin and current_product.get('name'):
                price = self.extract_price_from_observation(part)
                if price and not current_product['price']:
                    current_product['price'] = price
                    print(f" Product price: ${price}")
        

        if current_product and current_product.get('name'):
            products.append(current_product)
        
        print(f" Parsed {len(products)} products")
        return products
    
    def generate_keyword_subsets(self, text: str) -> List[str]:


        

        cleaned_text = re.sub(r'[^\w\s-]', ' ', text)
        words = [word.strip() for word in cleaned_text.split() if word.strip()]
        
        print(f" Original word count: {len(words)}")
        print(f" Original words: {words}")
        
        subsets = []
        

        for word in words:
            if len(word) > 2:
                subsets.append(word)
        

        for i in range(len(words) - 1):
            if len(words[i]) > 2 and len(words[i+1]) > 2:
                subsets.append(f"{words[i]} {words[i+1]}")
        

        for i in range(len(words) - 2):
            if len(words[i]) > 2 and len(words[i+1]) > 2 and len(words[i+2]) > 2:
                subsets.append(f"{words[i]} {words[i+1]} {words[i+2]}")
        

        for i in range(len(words) - 3):
            if all(len(words[i+j]) > 2 for j in range(4)):
                subsets.append(f"{words[i]} {words[i+1]} {words[i+2]} {words[i+3]}")
        

        for i in range(len(words) - 4):
            if all(len(words[i+j]) > 2 for j in range(5)):
                subsets.append(f"{words[i]} {words[i+1]} {words[i+2]} {words[i+3]} {words[i+4]}")
        

        for i in range(len(words)):
            for j in range(i + 2, len(words)):  
                if len(words[i]) > 2 and len(words[j]) > 2:
                    subsets.append(f"{words[i]} {words[j]}")
        

        for i in range(len(words)):
            for j in range(i + 2, len(words)):
                for k in range(j + 2, len(words)):
                    if len(words[i]) > 2 and len(words[j]) > 2 and len(words[k]) > 2:
                        subsets.append(f"{words[i]} {words[j]} {words[k]}")
        
        subsets = list(set(subsets))
        subsets.sort(key=len)  
        
        print(f" Subset count: {len(subsets)}")
        return subsets
    
    def search_with_keyword(self, keyword: str) -> Dict:
        
        try:
            
            obs, _ = self.env.reset()
            
          
            search_action = f"search[{keyword}]"
            obs, reward, done, info = self.env.step(search_action)
            
           
            products = self._parse_search_results(obs)
            
            return {
                'keyword': keyword,
                'products': products,
                'total_products': len(products),
                'found_target': self._check_target_found(products),
                'target_position': self._get_target_position(products)
            }
            
        except Exception as e:
            print(f"Search failed '{keyword}': {e}")
            return {
                'keyword': keyword,
                'products': [],
                'total_products': 0,
                'found_target': False,
                'target_position': -1
            }
    
    def _check_target_found(self, products: List[Dict]) -> bool:
       
        if not self.current_product:
            return False
        for product in products:
            if (product.get('asin') == self.current_product['asin'] or 
                product.get('name') == self.current_product['name']):
                return True
        return False
    
    def _get_target_position(self, products: List[Dict]) -> int:
       
        if not self.current_product:
            return -1
        for i, product in enumerate(products):
            if (product.get('asin') == self.current_product['asin'] or 
                product.get('name') == self.current_product['name']):
                return i
        return -1
    
    def find_unique_keywords(self, product: Dict) -> Tuple[List[Dict], List[Dict]]:
    


        self.current_product = product
       
        keywords = self.generate_keyword_subsets(product['name'])
        
        print(f"Testing {len(keywords)} keyword subsets")
        
        results = []
        unique_keywords = []
        
        for i, keyword in enumerate(keywords, 1):
            print(f"\nTesting {i}/{len(keywords)}: '{keyword}'")
            
            result = self.search_with_keyword(keyword)
            results.append(result)
            
            if result['found_target']:
                
                if result['total_products'] == 1:
                    print(f"Unique keyword: '{keyword}'")
                    unique_keywords.append({
                        'keyword': keyword,
                        'total_products': result['total_products'],
                        'target_position': result['target_position']
                    })
                else:
                    print(f" Not unique (total {result['total_products']} products)")
            else:
                print(f" Not found")
        
        return unique_keywords, results
    
    def get_available_actions(self) -> Dict:
       
        try:
            return self.env.get_available_actions()
        except:
            return {'clickables': [], 'has_search_bar': False}
    
    def execute_action(self, action: str) -> Tuple[str, float, bool, Dict]:

        
        try:
            obs, reward, done, info = self.env.step(action)
            

            trajectory_step = {
                'action': action,
                'observation': obs[:200] + "..." if len(obs) > 200 else obs,
                'reward': reward,
                'done': done,
                'info': info,
                'timestamp': datetime.now().isoformat()
            }
            self.trajectory.append(trajectory_step)
            
            print(f" Reward: {reward}, Done: {done}")
            
            return obs, reward, done, info
            
        except Exception as e:
            print(f"Action execution failed: {e}")
            return "", 0.0, False, {}
    
    def simulate_purchase_trajectory(self, product: Dict) -> bool:

        print(f"\nSimulating purchase trajectory: {product['name']}")
        print("=" * 60)
        
        self.current_product = product
        self.trajectory = []
        
        try:

            search_condition = "price > 2000"  
            obs, reward, done, info = self.execute_action(f"search[{search_condition}]")
            
            if done:
                print("Search completed")
                return False
            

            click_target = product.get('asin') or product['name']
            obs, reward, done, info = self.execute_action(f"click[{click_target}]")
            
            if done:
                print("Click product link completed")
                return False
            
            print("\nAnalyzing product page")
            obs = self._analyze_product_page(obs)
            
            if done:
                print("Analyzing product page completed")
                return False
            
            print("\nSelecting product options")
            self._select_product_options(obs)
            
            print("\nExploring product subpages")
            self._explore_product_subpages()
            
            print("\nFinalizing purchase")
            success = self._finalize_purchase()
            
            return success
            
        except Exception as e:
            print(f"Purchase trajectory simulation failed: {e}")
            return False
    
    def _analyze_product_page(self, obs: str):
        
        price = self.extract_price_from_observation(obs)
        if price:
            print(f" Product price: ${price:.2f}")
        

        available_actions = self.get_available_actions()
        clickables = available_actions.get('clickables', [])
        
        print(f" Clickable elements: {len(clickables)}")
        for clickable in clickables[:10]:  
            print(f"      - {clickable}")
        
        asin_codes = [c for c in clickables if c.startswith(('B', 'b')) and len(c) == 10]
        

        if not asin_codes:
            asin_pattern = r'B[A-Z0-9]{9}'
            text_asins = re.findall(asin_pattern, obs)
            if text_asins:
                asin_codes = text_asins
                print(f" Extracted ASIN from observation text: {asin_codes}")
            else:
                asin_pattern_loose = r'B[A-Z0-9]{9}(?=\s|\[SEP\]|$)'
                text_asins_loose = re.findall(asin_pattern_loose, obs)
                if text_asins_loose:
                    asin_codes = text_asins_loose
                    print(f" Extracted ASIN from observation text (loose matching): {asin_codes}")
        
        if asin_codes:
            print(f" Found ASIN codes: {asin_codes}")
            
            
            target_asin = None
            if hasattr(self, 'current_product') and self.current_product:
                target_asin = self.current_product.get('asin')
                if target_asin:
                   
                    for asin in asin_codes:
                        if asin.lower() == target_asin.lower():
                            target_asin = asin
                            break
                    else:
                        target_asin = None  
            
            
            if not target_asin:
                target_asin = asin_codes[0]
                print(f" Click ASIN code: {target_asin} (first available)")
            else:
                print(f" Click target product ASIN: {target_asin}")
            
            obs, reward, done, info = self.execute_action(f"click[{target_asin}]")
            return obs  
        else:
            print(" No ASIN codes found")
            print(f" All clickable elements: {clickables}")
            print(f" Observation text fragment: {obs[:500]}...")
        

        buy_buttons = [c for c in clickables if 'buy' in c.lower() or 'purchase' in c.lower() or 'add to cart' in c.lower()]
        if buy_buttons:
            buy_button = buy_buttons[0]
            obs, reward, done, info = self.execute_action(f"click[{buy_button}]")
            return obs  
        
        subpage_links = [c for c in clickables if any(keyword in c.lower() for keyword in ['description', 'features', 'reviews', 'specifications', 'details'])]
        if subpage_links:
            print(f" Found subpage links: {subpage_links}")
    
    def _select_product_options(self, obs: str):
        
        available_actions = self.get_available_actions()
        clickables = available_actions.get('clickables', [])
        
        option_keywords = ['color', 'size', 'style', 'material', 'option', 'variant']
        options = [c for c in clickables if any(keyword in c.lower() for keyword in option_keywords)]
        
        if options:

            selected_options = random.sample(options, min(2, len(options)))
            for option in selected_options:
                print(f" Select option: {option}")
                obs, reward, done, info = self.execute_action(f"click[{option}]")
                
                if done:
                    print(" Select option completed")
                    break
                
                time.sleep(0.5)  
            print(" No options found")
    
    def _explore_product_subpages(self):

        print(" Exploring product subpages")
        
        available_actions = self.get_available_actions()
        clickables = available_actions.get('clickables', [])
        

        subpage_keywords = ['description', 'features', 'reviews', 'specifications', 'details']
        subpages = [c for c in clickables if any(keyword in c.lower() for keyword in subpage_keywords)]
        
        if subpages:

            selected_subpages = random.sample(subpages, min(2, len(subpages)))
            for subpage in selected_subpages:
                obs, reward, done, info = self.execute_action(f"click[{subpage}]")
                
                if done:
                    print(" Explore subpage completed")
                    break
                

                time.sleep(0.5)
                obs, reward, done, info = self.execute_action("click[Back to Search]")
                if done:
                    break
                
                time.sleep(0.5)
        else:
            print(" No subpages found")
    
    def _finalize_purchase(self) -> bool:
        
        available_actions = self.get_available_actions()
        clickables = available_actions.get('clickables', [])
        

        buy_buttons = [c for c in clickables if 'buy' in c.lower() or 'purchase' in c.lower() or 'add to cart' in c.lower()]
        
        if buy_buttons:
            buy_button = buy_buttons[0]
            obs, reward, done, info = self.execute_action(f"click[{buy_button}]")
            
            if done:
                self.purchase_successful = True
                self.final_reward = reward
                print(f" Purchase successful! Final reward: {reward}")
                return True
            else:
                print(" Purchase not completed")
                return False
        else:
            print(" No buy button found")
            return False
    
    def display_trajectory_summary(self):

        if not self.trajectory:
            print(" No trajectory recorded")
            return
        
        print("\n" + "=" * 80)
        print(" Purchase trajectory summary")
        print("=" * 80)
        
        print(f" Target product: {self.current_product['name'] if self.current_product else 'N/A'}")
        print(f" Product price: ${self.current_product['price']:.2f}" if self.current_product else "N/A")
        print(f" Purchase successful: {'yes' if self.purchase_successful else 'no'}")
        print(f" Final reward: {self.final_reward}")
        print(f" Total steps: {len(self.trajectory)}")
        
        print(f"\n Detailed trajectory:")
        for i, step in enumerate(self.trajectory, 1):
            print(f"  {i}. {step['action']} (reward: {step['reward']}, done: {step['done']})")
        
        print("=" * 80)
    
    def save_results(self, unique_keywords: List[Dict], all_results: List[Dict], filename: str = None):

        if filename is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"webshop_analysis_{timestamp}.json"
        
        output_data = {
            'timestamp': datetime.now().isoformat(),
            'target_product': self.current_product,
            'purchase_successful': self.purchase_successful,
            'final_reward': self.final_reward,
            'trajectory': self.trajectory,
            'unique_keywords': unique_keywords,
            'keyword_search_results': all_results
        }
        
        try:
            with open(filename, 'w', encoding='utf-8') as f:
                json.dump(output_data, f, ensure_ascii=False, indent=2)
            print(f" Analysis results saved to: {filename}")
        except Exception as e:
            print(f" Save failed: {e}")
    
    def close(self):

        try:
            if self.env:
                self.env.close()
            print(" Environment closed")
        except:
            pass


def main():

    print(" WebShop analyzer")
    print("=" * 50)
    
    try:
        analyzer = WebShopAnalyzer()
        
        # Step 1: find the most expensive product
        product = analyzer.find_most_expensive_product()
        
        if not product:
            print(" No purchasable product found")
            analyzer.close()
            return
        
        # Step 2: simulate purchase trajectory
        print("\n" + "="*80)
        print(" Purchase trajectory simulation")
        print("="*80)
        success = analyzer.simulate_purchase_trajectory(product)
        
        # Step 3: find unique keywords
        print("\n" + "="*80)
        print(" Keyword subset analysis")
        print("="*80)
        unique_keywords, all_results = analyzer.find_unique_keywords(product)
        
        # Step 4: display results
        analyzer.display_trajectory_summary()
        
        print(f"\n" + "="*80)
        print(" Keyword analysis results")
        print("="*80)
        
        if unique_keywords:
            print(f" Found {len(unique_keywords)} unique keywords:")
            for i, kw in enumerate(unique_keywords, 1):
                print(f"   {i}. '{kw['keyword']}' (unique)")
        else:
            print(" No unique keywords found")
        
        # Step 5: save results
        analyzer.save_results(unique_keywords, all_results)
        
        # Step 6: close environment
        analyzer.close()
        
    except KeyboardInterrupt:
        print("\n User interrupted program")
    except Exception as e:
        print(f" Program execution failed: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    main()
