# -*- coding: utf-8 -*-
"""**Context Free Grammars Manipulation.**
Basic context-free grammars manipulation for building uniform random generetors
.. *Authors:* Rogério Reis & Nelma Moreira
.. *This is part of FAdo project* http://fado.dcc.fc.up.pt
.. *Copyright:* 1999-2014 Rogério Reis & Nelma Moreira {rvr,nam}@dcc.fc.up.pt
.. This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
675 Mass Ave, Cambridge, MA 02139, USA."""
#__package__ = "FAdo"
import re
import string
from types import StringType
from random import randint
import common
[docs]class CFGrammar(object):
""" Class for context-free grammars
:var Rules: grammar rules
:var Terminals: terminals symbols
:var Nonterminals: nonterminals symbols
:var Start: start symbol
:type Start: string
:var ntr: dictionary of rules for each nonterminal"""
def __init__(self, gram):
"""Initialization
:param gram: is a list for productions; each production is a tuple (LeftHandside, RightHandside) with
LeftHandside nonterminal, RightHandside list of symbols, First production is for start symbol
"""
self.Rules = gram
self.Nonterminals = {r[0] for r in self.Rules}
self.Terminals = set()
for r in self.Rules:
if type(r[1]) is StringType:
if r[1] not in self.Nonterminals:
self.Terminals.add(r[1])
else:
for s in r[1]:
if s not in self.Nonterminals:
self.Terminals.add(s)
self.Start = self.Rules[0][0]
self.Nullable = {}
self.tr = {}
self.ntr = {}
for i in xrange(len(self.Rules)):
if self.Rules[i][0] not in self.ntr:
self.ntr[self.Rules[i][0]] = {i}
else:
self.ntr[self.Rules[i][0]].add(i)
def __str__(self):
"""Grammar rules
:return: a string representing the grammar rules"""
s = ""
for n in xrange(len(self.Rules)):
lhs = self.Rules[n][0]
rhs = self.Rules[n][1]
if type(rhs) is not StringType and len(rhs) > 1:
rhs = string.join(rhs)
s += "{0:s} | {1:s} -> {2:s} \n".format(n, lhs, rhs)
return "Grammar Rules:\n\n%s" % s
[docs] def maketerminals(self):
"""Extracts C{terminals} from the rules. Nonterminals must already exist"""
self.Terminals = set()
for r in self.Rules:
if type(r[1]) is StringType:
if r[1] not in self.Nonterminals:
self.Terminals.add(r[1])
else:
for s in r[1]:
if s not in self.Nonterminals:
self.Terminals.add(s)
[docs] def makenonterminals(self):
"""Extracts C{nonterminals} from grammar rules."""
for r in self.Rules:
self.Nonterminals.add(r[0])
def terminalrules(self):
self.tr = {}
for a in self.Terminals:
for i in xrange(len(self.Rules)):
if self.Rules[i][1] == a:
if a not in self.tr:
self.tr[a] = {i}
else:
self.tr[a].add(i)
def nonterminalrules(self):
self.ntr = {}
for i in xrange(len(self.Rules)):
if self.Rules[i][0] not in self.ntr:
self.ntr[self.Rules[i][0]] = {i}
else:
self.ntr[self.Rules[i][0]].add(i)
[docs] def NULLABLE(self):
"""Determines which nonterminals X ->* [] """
self.Nullable = {}
for s in self.Terminals:
self.Nullable[s] = 0
for s in self.Nonterminals:
self.Nullable[s] = 0
if s in self.ntr:
for i in self.ntr[s]:
if not self.Rules[i][1]:
self.Nullable[s] = 1
break
k = 1
while k == 1:
k = 0
for r in self.Rules:
e = 0
for i in r[1]:
if not self.Nullable[i]:
e = 1
break
if e == 0 and not self.Nullable[r[0]]:
self.Nullable[r[0]] = 1
k = 1
[docs]class CNF(CFGrammar):
"""No useless nonterminals or epsilon rules are ALLOWED... Given a CFG grammar description generates one in CNF
Then its possible to random generate words of a given size. Before some pre-calculations are nedded."""
def __init__(self, gram, mark="A@"):
# super(CNF, self).__init__(gram)
CFGrammar.__init__(self, gram)
self.mark = mark
self.newnt = 0
self.nttr = {}
self.unitary = self.get_unitary()
self.Chomsky()
def get_unitary(self):
return set([r for r in self.Rules if
(type(r[1]) is StringType and
r[1] in self.Nonterminals) or
(len(r[1]) == 1 and r[1][0] in self.Nonterminals)])
[docs] def elim_unitary(self):
"""Elimination of unitary rules """
f = 1
while f:
f = 0
self.unitary = self.get_unitary()
for u in self.unitary:
if type(u[1]) is StringType:
ui = u[1]
else:
ui = u[1][0]
if ui in self.ntr:
for i in self.ntr[ui]:
if (u[0], self.Rules[i][1]) not in self.Rules:
f = 1
self.Rules.append((u[0], self.Rules[i][1]))
self.ntr[u[0]].add(len(self.Rules) - 1)
for u in self.unitary:
self.Rules.remove(u)
def get_ntr_tr(self, a):
nta = self.mark + a
self.Nonterminals.add(nta)
self.Rules.append((nta, a))
return nta
def iter_rule(self, lhs, rhs, i):
if type(rhs) is not StringType and len(rhs) == 2:
self.Rules[i] = ((lhs, rhs))
return
nta = self.mark + "_" + str(self.newnt)
self.Nonterminals.add(nta)
self.newnt += 1
self.Rules.append((lhs, (rhs[0], nta)))
self.iter_rule(nta, rhs[1:], i)
[docs] def Chomsky(self):
""" Transform to CNF """
self.elim_unitary()
self.nttr = {}
# terminal a is replaced by A@_a in all rules > 2
for a in self.Terminals:
for i in xrange(len(self.Rules)):
if type(self.Rules[i][1]) is not StringType and len(self.Rules[i][1]) >= 2 and a in self.Rules[i][1]:
if a not in self.nttr:
self.nttr[a] = self.get_ntr_tr(a)
rr = list(self.Rules[i][1])
for k in xrange(len(rr)):
if rr[k] == a:
rr[k] = self.nttr[a]
self.Rules[i] = (self.Rules[i][0], tuple(rr))
n = len(self.Rules)
for i in xrange(n):
if type(self.Rules[i][1]) is not StringType and len(self.Rules[i][1]) > 2:
self.iter_rule(self.Rules[i][0], self.Rules[i][1], i)
[docs]class cfgGenerator(object):
"""CFG uniform genetaror """
def __init__(self, cfgr, size):
"""Object initialization
:param cfgr: grammar for the random objects
:type cfgr: CNF
:param size: size of objects
:type size: integer"""
self.grammar = cfgr
self.size = size
# self.density = {}
# self.density_r = {}
self._eval_densities(size)
[docs] def generate(self):
"""Generates a new random object generated from the start symbol
:returns: object
:rtype: string"""
return self._gen(self.grammar.Start, self.size)
def _gen(self, nt, n):
"""Generates a new random object generated from the nonterminal
:param nt: nonterminal
:type nt: string
:param n: object size
:type n: integer
:returns: object
:rtype: string"""
g = self.grammar
if n in self.density[nt] and self.density[nt][n] > 0:
u = randint(1, self.density[nt][n])
r = 1
if n == 1:
for i in g.ntr[nt]:
if g.Rules[i][1] in g.Terminals:
r += 1
if r > u:
ic = i
break
try:
return g.Rules[ic][1]
except KeyError:
raise KeyError
for i in g.ntr[nt]:
if len(g.Rules[i][1]) == 2:
if n in self.density_r[i]:
r += self.density_r[i][n]
if r > u:
ic = i
break
uk = randint(1, self.density_r[ic][n])
rk = 1
for k in xrange(1, n):
if (k in self.density[g.Rules[ic][1][0]] and self.density[g.Rules[ic][1][0]][k] > 0 and
n - k in self.density[g.Rules[ic][1][1]] and self.density[g.Rules[ic][1][1]][
n - k] > 0):
rk += self.density[g.Rules[ic][1][0]][k] * self.density[g.Rules[ic][1][1]][n - k]
if rk > uk:
kk = k
break
return self._gen(g.Rules[ic][1][0], kk) + self._gen(g.Rules[ic][1][1], n - kk)
def _eval_densities(self, n):
"""Evaluates densities
:param n: object size
:type n: integer"""
g = self.grammar
self.density = {}
self.density_r = {}
for nt in g.Nonterminals:
self.density[nt] = {}
self.density[nt][1] = 0
g.terminalrules()
g.nonterminalrules()
for t in g.tr:
for i in g.tr[t]:
self.density[g.Rules[i][0]][1] += 1
for l in xrange(2, n + 1):
for nt in g.ntr:
r = 0
for i in g.ntr[nt]:
if len(g.Rules[i][1]) == 2:
if i not in self.density_r:
self.density_r[i] = {}
self.density_r[i][l] = sum(
[self.density[g.Rules[i][1][0]][k] * self.density[g.Rules[i][1][1]][l - k] for k in
xrange(1, l) if
k in self.density[g.Rules[i][1][0]] and l - k in self.density[g.Rules[i][1][1]]])
r += self.density_r[i][l]
if r:
self.density[nt][l] = r
[docs]class reStringRGenerator(cfgGenerator):
"""Uniform random Generator for reStrings"""
def __init__(self, Sigma=["a", "b"], size=10, cfgr=None, eps=None, empty=None, ident="Ti"):
""" Uniform random generator for regular expressions. Used without arguments generates an uncollapsible re
over {a,b} with size 10. For generate an arbitary re over an alphabet of 10 symbols of size 100:
reStringRGenerator (small_alphabet(10),100,reStringRGenerator.g_regular_base)
:param Sigma: re alphabet (that will be the set of grammar terminals)
:type Sigma: list or set
:param size: word size
:type size: integer
:param cfgr: base grammar
:param epsilon: if not None is added to a grammar terminals
:param empty: if not None is added to a grammar terminals
.. note::
the grammar can have already this symbols"""
if cfgr is None:
self.base = gRules(reGrammar["g_regular_uncollaps"])
else:
self.base = gRules(cfgr)
self.Sigma = Sigma
for i in self.Sigma:
self.base.append((ident, i))
if eps is not None:
self.base.append((ident, common.Epsilon))
if empty is not None:
self.base.append((ident, common.EmptySet))
self.gen = cfgGenerator(CNF(self.base), size)
[docs] def generate(self):
"""Generates a new random RE string"""
return self.gen.generate()
#noinspection PyUnusedLocal
def CYKParserTable(gramm, word):
"""Evaluates CYK parser table
:param gramm: grammar
:type gramm: CNF
:param word: word to be parsed
:type word: string
:returns: the CYK table
:rtype: list of lists of symbols"""
pass
[docs]def gRules(rules_list, rulesym="->", rhssep=None, rulesep='|'):
"""Transforms a list of rules into a grammar description.
:param rules_list: is a list of rule where rule is a string of the form: Word rulesym Word1 ... Word2 or Word
rulesym []
:param rulesym: LHS and RHS rule separator
:param rhssep: RHS values separator (None for white chars)
:return: a grammar description """
gr = []
sep = re.compile(rulesym)
#rsep = re.compile(rulesep)
for r in rules_list:
if type(r) is StringType:
rule = r
else:
rule = r[0]
m = sep.search(rule)
if not m:
continue
else:
if m.start() == 0:
raise common.CFGgrammarError(rule)
else:
lhs = rule[0:m.start()].strip()
if m.end() == len(rule):
raise common.CFGgrammarError(rule)
else:
rest = string.strip(rule[m.end():])
if rest == "[]":
rhs = []
else:
multi = string.split(rest, rulesep)
rhs = []
for i in multi:
l = string.split(i, rhssep)
if len(l) > 1:
l = tuple(string.split(i, rhssep))
else:
l = l[0]
gr.append((lhs, l))
return gr
[docs]def smallAlphabet(k, sigma_base="a"):
"""Easy way to have small alphabets
:param k: alphabet size (must be less than 52)
:param sigma_base: initial symbol
:returns: alphabet
:rtype: list"""
Sigma = []
if k >= 52:
raise common.CFGterminalError(k)
lim = min(26, k)
for i in xrange(lim):
Sigma.append(chr(ord(sigma_base) + i))
if k >= 26:
sigma_base = 'A'
for i in xrange(k - lim):
Sigma.append(chr(ord(sigma_base) + i))
return Sigma
reGrammar = {'g_regular_base': ["Tr -> Tr + Tc | Tc", "Tc -> Tc Ts | Ts",
"Ts -> Ts * | Ti | ( Tr ) "],
'g_regular_wredund': ["Tr -> Trs | Tf ", "Trs -> Trs + Tf | Tf + Tf",
"Tf -> Tc | Te | Ti",
"Tc -> Tc Ts | Ts Ts",
"Ts -> Te | Ti | ( Trs ) ",
"Te -> ( Trs ) * | ( Tc ) * | Ti *"],
'g_regular_uncollaps': ["Ts -> Trs | Tcc | Tee | Ti | %s | %s" % (common.Epsilon, common.EmptySet),
"Tcc -> Tcc Tr | Tr Tr",
"Tr -> ( Trs ) | Tee | Ti ",
"Tee -> ( Trs ) * | ( Tcc ) * | Ti *",
"Trs -> %s + Tx | Ty + Tz" % common.Epsilon,
"Tx -> Tt | Tt + Tx",
"Tt -> Tcc | Ti ",
"Ty -> Tz | Ty + Tz",
"Tz -> Tcc | Tee | Ti "],
'g_rpn': ["Tx -> %s | Ti | + Tx Tx | . Tx Tx | * Tx" % common.Epsilon],
'g_sha': ["Ts -> Ted | Tec | Tes | Ti |%s" % common.Epsilon,
"Tec -> . Tec Tr | . Tr Tr ",
"Tr -> Ted | Tes | Ti",
"Tes -> * Ted | * Tec | * Ti",
"Ted -> + %s Tx | + Ty Tz" % common.Epsilon,
"Tx -> Tt | + Tt Tx",
"Tt -> Tec | Ti",
"Ty -> Tz | + Ty Tz",
"Tz -> Tec | Tes | Ti"],
'g_rpn_pi': [ "Tp -> Ti | + Tp Tx | + Tnp Tp | . Tx Tp | . Tp %s" % common.Epsilon,
"Tx -> %s | Ti | + Tx Tx | . Tx Tx | * Tx" % common.Epsilon,
"Tnp -> %s | + Tnp Tnp | . Tnp Tnp | . Tp Tnp" % common.Epsilon]}