package eu.youssefsan.dom.countWords;

import eu.youssefsan.dom.countWords.DOMDocument ;

import java.io.*;
import org.w3c.dom.*;
import javax.xml.parsers.*;
import java.util.*;


/* To be used with nl Monodix

*/
public class CountWords {
 Document myDocument;
 Node myNode;
 String tag ; //= "paradefs"
 String pluralEnding ; //plural ending for nouns in <pardef>
 private int __adjCounter, __nCounter, __numCounter, __advCounter, __vblexCounter;

 int paradigmCounter ;
 int tagMatches ; //Examples matches for <pardef>

/**count the number of matches of a tag in a document, regardless of his position in the tree (root element, children, grandchildren, etc are all counted)

*/
 
 public int countMatches(Document doc, String elementName) {

  NodeList myNodeList = doc.getElementsByTagName(elementName);
  int numberOfMatches = myNodeList.getLength();
  return numberOfMatches;
 }

 /** Count matches for subString (e.g. __n) in  attribute of elementName
 e.g: CountMatches (myDocument, pardef, __n)
 */
 //public int countNounMatches(Document doc, String elementName, String substring) 


 public ArrayList<String> getAllNounParadigms(Document doc) {
  //System.out.println("In count noun paradigms");
  //System.out.println("The format is: gender/root/singular_ending/plural_ending/__n");
  int numberOfElementNames = countMatches(doc,"pardef");
  NodeList myChildList = doc.getElementsByTagName("pardef");
  ArrayList<String> nounList = new ArrayList<String>();
   
  for (int i = 0 ; i < numberOfElementNames; i ++)  {
   Node iPardefNode = myChildList.item(i) ;
   Element iPardefElement = (Element) iPardefNode ;
   Attr iAttr = iPardefElement.getAttributeNode("n") ;
   String iValue = iAttr.getValue() ;//value of the attribute with "n"
  // System.out.println("In get all noun paradigm, i = " + i + " ; iValue = " + iValue);
    if (isANoun (iValue) == true) {
     int _Index= iValue.indexOf('_');
     String iRoot = iValue.substring(0,_Index);
     String pluralEnding = searchNounPluralEnding(iPardefElement );
     String iGender = getNounGender(iPardefElement);
     String iCanonicalNoun = iGender + "/" + iRoot + "/" + pluralEnding + "/__n"; //iCanonicalNoun == gender/root/ singular ending/plural ending/__n. Example: nt/l/id/eden/__n
     
     nounList.add(iCanonicalNoun);
     __nCounter++;
     //System.out.println("__nCounter = " + __nCounter + " ; iAttr = " + iAttr + " ; iValue = " + iValue ) ;
    
   }
  }
 return nounList;
 }
 
 /** Return the lemma (most often the singular) and the paradigm
 It analyzes only the content under the first section
 
 */
 public ArrayList<String> getAllNounForms (Document doc) {
  //System.out.println("In getAllNounsForms");
  //System.out.println("Lemma/Paradigm");
  NodeList myNodeList = doc.getElementsByTagName("section");
  Node sectionNode = myNodeList.item(0);
  NodeList parNodeList =((Element)  sectionNode).getElementsByTagName("par");
  int parLength = parNodeList.getLength();
  ArrayList<String> nounParadigmList = new ArrayList<String>();
  for (int i =0 ; i <parLength ; i++)  {
   Node iParNode = parNodeList.item(i) ;
   Element iParElement = (Element) iParNode ;
   Attr iParAttr = iParElement.getAttributeNode("n");
   String iParValue = iParAttr.getValue() ;//value of the attribute with "n"
    if (isANoun (iParValue) == true) {
     Node iLemmaNode = iParElement.getParentNode() ;
     Element iLemma = (Element) iLemmaNode;
    
     Attr lemmaAttr = iLemma.getAttributeNode("lm");
     String lemmaValue = lemmaAttr.getValue();
     String canonicalLemma = lemmaValue +"/"+ iParValue;
     //canonicalNoun = lemma/Paradigm 
     nounParadigmList.add(canonicalLemma);
   }
  } 
  return nounParadigmList;
 }
 
 /** Search noun plural ending in pardef. Ending means here what you have to add the root (Apertium definition of root) to get the plural.
 The method analyze an iNode and search the ending.
 
 /*For tests
  int myLength = myNodeList.getLength();
  
  for (int i = 0; i < myLength; i++) {
   Node iNode = myNodeList.item(i) ;
   String iName = iNode.getNodeName() ; 
   String iValue = iNode.getNodeValue(); 
   //System.out.println("i = " + i + " ; iName = " + iName + " ; iValue = " + iValue); 
   //values for i. 0= #text ; 1 = e ; 2= #text ; 3=  e ; 4 = #text
   // the first e is for singular, the second for plural. So we have to look at i = 3
  }
 */
 
 public String searchNounPluralEnding(Element parElement ) {
 
  //System.out.println("In search noun plural ending");
  NodeList myNodeList =parElement.getElementsByTagName("e");
  //System.out.println("parElement.getElementsByTagName(e)");
  Node ePluralNode = myNodeList.item(1); //looking at second <e>. First <e> is for singular. Second <e> is for plural.
  //System.out.println("myNodeList.item(1)");
  try {
  NodeList  eNodeList = ((Element) ePluralNode).getElementsByTagName("l"); //the buggy line npe 
  //System.out.println("getElementsByTagName (l)");
 
   Node lNode = eNodeList.item(0) ; //there is only one <l>
   //System.out.println("eNodeList.item(0)");
   Node lTextNode = lNode.getFirstChild() ;
   //System.out.println("lNode.getFirstChild()");
   CharacterData pluralData = (CharacterData) lTextNode ;
   //System.out.println("(CharacterData) lTextNode");
   if (lTextNode != null && pluralData.getData()!=null) { pluralEnding = pluralData.getData() ; }
   /*
   if (pluralData.getData()null) { pluralEnding = " " ;}
    else { pluralEnding = pluralData.getData() ; }
*/
  } catch (Exception e) { 
   System.out.println(e.getMessage());
   e.printStackTrace(); 
   }
   //if (pluralEnding==null) {pluralEnding=" ";}//should not be there. Just a test to tyy to get rid of the null pointer exception
  if (pluralEnding == null) {pluralEnding = " " ;}
  return pluralEnding ;
 }
 
 /**Get gender in pardef. It assumes that the gender is in the first <r> and in the second <s/> of <r>.
*/

 public String getNounGender(Element parElement) {
  //System.out.println("In getNounGender");
  NodeList myNodeList = parElement.getElementsByTagName("r");
  Node rNode = myNodeList.item(0);
  NodeList rNodeList = ((Element)rNode).getElementsByTagName("s");
  Node sNode = rNodeList.item(1);
  Element sElement = (Element) sNode;
  Attr genderAttr = sElement.getAttributeNode("n");
  String gender = genderAttr.getValue();
  try {
   if (gender==(null)) { gender =""; }
  } catch (Exception e) { 
   System.out.println(e.getMessage()); 
   e.printStackTrace();
  }
  //System.out.println("GenderAttr = " + gender);
  return gender;
 
 }
 
 
 /** Check (for paradef n=) if the attribut is a noun 
 */
 boolean isANoun (String attValue) {
  return attValue.contains("__n") &&  attValue.contains("__num")==false ;
 }
 
 public int countAdjectiveParadigms(Document doc) {
  int numberOfElementNames = countMatches(doc,"pardef");
  NodeList myChildList = doc.getElementsByTagName("pardef");
  for (int i = 0 ; i < numberOfElementNames; i ++)  {
   Node iNode = myChildList.item(i) ;
   Attr iAttr = ((Element) iNode).getAttributeNode("n");
   String iValue = iAttr.getValue() ;//value of the attribute with "n"
   if (iValue.contains("__adj")) {
     __adjCounter++;
   //System.out.println("__adjCounter = " + __adjCounter + " ; iAttr = " + iAttr + " iValue = " + iValue ) ;
   }
  }
 return __adjCounter;
 }
 
  public int countLexVerbParadigms(Document doc) {
  int numberOfElementNames = countMatches(doc,"pardef");
  NodeList myChildList = doc.getElementsByTagName("pardef");
  for (int i = 0 ; i < numberOfElementNames; i ++)  {
   Node iNode = myChildList.item(i) ;
   Attr iAttr = ((Element) iNode).getAttributeNode("n");
   String iValue = iAttr.getValue() ;//value of the attribute with "n"
   if (iValue.contains("__vblex")) {
     __vblexCounter++;
     System.out.println("__vblex = " + __vblexCounter + " ; iAttr = " + iAttr + " iValue = " + iValue ) ;
   }
  }
 return __vblexCounter;
 }



 
 public int countChildren2(Document doc, String parentName) {
 
  NodeList myNodeList = doc.getElementsByTagName(parentName);
  int myLength = myNodeList.getLength();
  return myLength;
  }
  

  
 public int countChildren(Document doc, String parentName) {
 
  NodeList myNodeList = doc.getElementsByTagName(parentName);
  int myLength = myNodeList.getLength();
  System.out.println("In countChildren, myLength = " + myLength);
  
  Node myParent = myNodeList.item(0);
  System.out.println("myParent(0).getNodeName() = " + myParent.getNodeName() );
  NodeList myChildList = myParent.getChildNodes();
  int numberOfChildren = myChildList.getLength();

  for (int i = 0 ; i < numberOfChildren ; i ++) {
 
   Node iNode = myChildList.item(i) ;
   String iName = iNode.getNodeName();
   if (iName.compareTo("pardef") == 0) {
    Attr iAttr = ((Element) iNode).getAttributeNode("n");
  
    String iValue = iAttr.getValue() ;//value of the attribute with "n"
    if (iValue.contains("__n") &&  iValue.contains("__num")==false ) { 
     __nCounter++;
     //System.out.println("__nCounter = " + __nCounter + " ; iAttr = " + iAttr + " iValue = " + iValue )
    }
    paradigmCounter ++;
   }
  }
  return paradigmCounter ;
 }
 
  
 
 public int getNumberOfParadef (String tag) {
  NodeList parNodeList = myDocument.getElementsByTagName(tag) ;
  int numberOfParadef = parNodeList.getLength();
  return numberOfParadef;
 }
 
 /* 
 public Document createDocument(String sourceFile) {
  try {
   DocumentBuilderFactory myFactory = DocumentBuilderFactory.newInstance();
   DocumentBuilder myBuilder = myFactory.newDocumentBuilder();
   FileInputStream inFile = new FileInputStream(sourceFile);
   myDocument = myBuilder.parse(inFile);
  }
  catch (Exception e) { 
   System.out.println(e.getMessage());
   e.printStackTrace();
  }
 return myDocument;
 }
*/
 public static void main (String[] Args) throws IOException {
 
  CountWords cw = new CountWords();
  DOMDocument dd = new DOMDocument ();
  String sourceFile = "apertium-fr-nl.nl.dix.xml";
  //String sourceFile = "eu.youssefsan.countWords.apertium-fr-nl.nl.dix.xml";
  Document myDocument = dd.createDOMDocument(sourceFile);
  //int pardefMatches = cw.countMatches(myDocument, "pardef" ); //it works
  //System.out.println ("The number of matches for 'pardef' is: " + pardefMatches);//It workds
  //boolean existsOnlyOne = cw.existsOnlyOneElementWithThatName(myDocument, "pardefs");
  
  
  /*Yet another npe !!!!
  int nPardefs = cw.countChildren2(myDocument, "pardefs" );
  System.out.println("There are " + nPardefs + " elements with name 'pardefs' in the document");
  int nPardef = cw.countChildren2(myDocument, "pardef" );
  System.out.println("There are " + nPardef + " elements with name 'pardef' in the document");
  ==============================================================*/
  //ArrayList<String> nounArray = cw.getAllNounParadigms(myDocument);
  
  /*Stupid npe again =================================
  int i = 1;
  for (String element : nounArray) {
   System.out.println("Element " + i +": "+ element );
   i++;
  }
  System.out.println("There are " + nounArray.size() + " paradigms for nouns" );
   =================================*/
  /*Stupid npe !!!
   
  System.out.println();
  System.out.println();
   
   
  ArrayList<String> nounParArray = cw.getAllNounForms(myDocument);
  int j = 1;
  for (String element : nounParArray) {
   System.out.println("Lemma " + j +": "+ element );
   j++;
  }
  System.out.println("There are " + nounParArray.size() + " lemmas" );
  
  
  
  
  
  ---------------------------------------------------------------
  */
  
  
  //int __nMatches = cw.countNounParadigms(myDocument);
  //System.out.println("There are " + __nMatches + " paradigms for nouns");
 
 /*//It works
  int __adjMatches = cw.countAdjectiveParadigms(myDocument);
  System.out.println("There are " + __adjMatches + " paradigms for adjectives");
  
  int __vMatches = cw.countLexVerbParadigms(myDocument);
  System.out.println("There are " + __vMatches + " paradigms for lexical verbs");
  
  int NumberOfParadigms = __nMatches + __adjMatches + __vMatches;
  System.out.println("There are " + nPardef + " paradigms, including " + __nMatches + " for nouns, " + __adjMatches + " for adjectives, and " + __vMatches + " for lexical verbs.");
 */ //It works
 
  //To check again
  
  //cw.countMatches(myDocument, "pardef" );
  //int pResult = cw.countChildren(myDocument, "pardefs");
  //System.out.println (pResult); 
  //System.out.println(cw.get__nCounter(myDocument));
  
 }
}

