Tuesday, April 15, 2008

wordparser.py

I was pretty bored yesterday morning, so I decided to implement a CSE143 homework assignment I heard was given here: a string generator. Given a list file of certain syntax, it could generate pseudorandom sentences:

wordparser.py

#!/usr/bin/python
import os, sys, random

class SentenceGenerator(dict):
def __init__(self, lines):
for line in lines:
if len(line):
name, x, value = line.partition(':')
self[name] = self.addparts(value)
#print self

def addparts(self, s):
lcount, rcount = 0, 0
addto = ()
for i in s:
if i == '<':
lcount += 1
elif i == '>':
rcount += 1
if lcount != rcount:
return ()
level = 0
accum = ''
ref = False
for i in s:
if i == '<':
level += 1
elif i == '>':
level -= 1
if level == 0:
addto += (self.addparts(accum),)
accum = ''
elif i == ':' and level == 0 and not len(accum):
ref = True
elif i == ' ' and level == 0:
if ref and len(accum):
if accum in self:
addto += self[accum]
else:
addto += (accum,)
ref = False
elif len(accum):
if accum == '\'':
addto += ('',)
else:
addto += (accum,)
accum = ''
else:
#print 'CHAR: \'%c\', LEVEL: %d, REF %s' % (i, level, ref)
accum += i
if level > 0:
return ()
elif not len(accum) and ref:
return ()
elif len(accum):
if ref:
if accum in self:
addto += self[accum]
else:
addto += (accum,)
ref = False
else:
if accum == '\'':
addto += ('',)
else:
addto += (accum,)
accum = ''
return addto
def randomelement(self, key):
if not key in self:
#print 'NOT FOUND:', key
return None
else:
ret = random.choice(self[key])
#print 'GIVEN:', key, '\n\tGOT:', ret
return ret
def generate(self, level = 'sentence'):
if not len(self):
return ''
ret = self.randomelement(level)
if not ret:
return ''
elif isinstance(ret, tuple):
out = ()
for i in ret:
tmp = self.generate_ref(i)
#print '\t' + str(tmp)
if isinstance(tmp, tuple):
out += tmp
else:
out += (tmp,)
return out
else:
return ret
def generate_ref(self, toget):
if isinstance(toget, tuple):
ret = ()
for i in out:
tmp = self.generate_ref(i)
if isinstance(tmp, tuple):
out += tmp
else:
out += (tmp,)
return ret
else:
out = self.randomelement(toget)
if isinstance(out, tuple):
ret = ()
for i in out:
#print '%s in %s' % (i, str(out))
tmp = self.generate_ref(i)
if isinstance(tmp, tuple):
ret += tmp
else:
ret += (tmp,)
return ret
else:
return out



if __name__ == '__main__':
if len(sys.argv) >= 2 and os.path.isfile(sys.argv[1]):
fp = open(sys.argv[1], 'r')
lines = fp.readlines()
fp.close()
lines = map(lambda st: st[:-1], lines)
sg = SentenceGenerator(lines)
st = ' '.join(sg.generate())
st = st[0].upper() + st[1:]
if st[-1] in sg['stop'] and st[-2] == ' ':
st = st[:-2] + st[-1]
st = st.replace(' ', ' ')
print st

else:
sys.stderr.write("Usage: %s words.lst\n" % sys.argv[0])

Example words.lst

adverb:amazingly brilliantly deadly creatively
adjective:red green blue spherical
name:Neil John David rms Torvalds
conjunction:and or
join:' :conjunction
title:the a
noun:tree ball doom cow :name
verb:kills destroys decimated eats ate assumes
action:<verb> <adverb verb>
descriptor:<adjective> <adverb adjective>
subject:<descriptor noun> <descriptor conjunction descriptor noun> <adverb adjective conjunction adjective noun>
object:<title descriptor noun> <title descriptor conjunction descriptor noun> <adverb adjective conjunction adjective noun>
stop:. ! ?
sentence:<subject stop> <subject action stop> <noun action stop> <subject action object stop> <noun action noun stop> <name verb adjective name stop> <verb object stop>

Of course, the list file is very simply put together, so the sentences it generates don't always make grammatical sense. However, it's easily redoable by editing the file:% for i in {1..10}; python wordparser.py words.lst
Creatively blue and red rms?
Assumes a amazingly red or blue cow?
Deadly red or red Neil?
Decimated a spherical ball.
Doom destroys cow!
Neil eats green David.
John eats?
Torvalds ate spherical John!
Kills creatively red or blue Torvalds.
Eats the amazingly green or red John?

No comments: