1 | # from web2py |
---|
2 | |
---|
3 | from __future__ import print_function |
---|
4 | from pydal._compat import pickle, unicodeT |
---|
5 | import sys |
---|
6 | import re |
---|
7 | import random |
---|
8 | import datetime |
---|
9 | |
---|
10 | IUP = {'ad': {'minim': 1}, 'irure': {'dolor': 1}, 'in': {'voluptate': 1, 'reprehenderit': 1, 'culpa': 1}, 'ea': {'commodo': 1}, 'excepteur': {'sint': 1}, 'sunt': {'in': 1}, 'elit': {'sed': 1}, 'duis': {'aute': 1}, 'sed': {'do': 1}, 'eiusmod': {'tempor': 1}, 'enim': {'ad': 1}, 'eu': {'fugiat': 1}, 'et': {'dolore': 1}, 'labore': {'et': 1}, 'incididunt': {'ut': 1}, 'reprehenderit': {'in': 1}, 'est': {'laborum': 1}, 'quis': {'nostrud': 1}, 'sit': {'amet': 1}, 'deserunt': {'mollit': 1}, 'nostrud': {'exercitation': 1}, 'qui': {'officia': 1}, '.': {'excepteur': 1, 'ut': 1, 'duis': 1}, 'consectetur': {'adipiscing': 1}, 'aute': {'irure': 1}, 'dolore': {'eu': 1, 'magna': 1}, 'mollit': {'anim': 1}, 'aliquip': {'ex': 1}, 'nulla': {'pariatur': 1}, 'laborum': {'': 1}, 'do': {'eiusmod': 1}, 'non': {'proident': 1}, 'commodo': {'consequat': 1}, 'aliqua': {'.': 1}, 'cillum': {'dolore': 1}, 'sint': {'occaecat': 1}, 'velit': {'esse': 1}, 'officia': {'deserunt': 1}, 'veniam': {'quis': 1}, 'consequat': {'.': 1}, 'magna': {'aliqua': 1}, 'cupidatat': {'non': 1}, 'ullamco': {'laboris': 1}, 'lorem': {'ipsum': 1}, 'amet': {'consectetur': 1}, 'ipsum': {'dolor': 1}, 'nisi': {'ut': 1}, 'fugiat': {'nulla': 1}, 'occaecat': {'cupidatat': 1}, 'proident': {'sunt': 1}, 'ut': {'aliquip': 1, 'labore': 1, 'enim': 1}, 'minim': {'veniam': 1}, 'culpa': {'qui': 1}, 'tempor': {'incididunt': 1}, 'pariatur': {'.': 1}, 'laboris': {'nisi': 1}, 'anim': {'id': 1}, 'adipiscing': {'elit': 1}, 'id': {'est': 1}, 'dolor': {'in': 1, 'sit': 1}, 'voluptate': {'velit': 1}, 'esse': {'cillum': 1}, 'exercitation': {'ullamco': 1}, 'ex': {'ea': 1}} |
---|
11 | |
---|
12 | FIRST_NAMES = "James,John,Robert,Michael,William,David,Richard,Charles,Joseph,Thomas,Christopher,Daniel,Paul,Mark,Donald,George,Kenneth,Steven,Edward,Brian,Ronald,Anthony,Kevin,Jason,Matthew,Gary,Timothy,Jose,Larry,Jeffrey,Frank,Scott,Eric,Stephen,Andrew,Raymond,Gregory,Joshua,Jerry,Dennis,Walter,Patrick,Peter,Harold,Douglas,Henry,Carl,Arthur,Ryan,Roger,Joe,Juan,Jack,Albert,Jonathan,Justin,Terry,Gerald,Keith,Samuel,Willie,Ralph,Lawrence,Nicholas,Roy,Benjamin,Bruce,Brandon,Adam,Harry,Fred,Wayne,Billy,Steve,Louis,Jeremy,Aaron,Randy,Howard,Eugene,Carlos,Russell,Bobby,Victor,Martin,Ernest,Phillip,Todd,Jesse,Craig,Alan,Shawn,Clarence,Sean,Philip,Chris,Johnny,Earl,Jimmy,Antonio,Danny,Bryan,Tony,Luis,Mike,Stanley,Leonard,Nathan,Dale,Manuel,Rodney,Curtis,Norman,Allen,Marvin,Vincent,Glenn,Jeffery,Travis,Jeff,Chad,Jacob,Lee,Melvin,Alfred,Kyle,Francis,Bradley,Jesus,Herbert,Frederick,Ray,Joel,Edwin,Don,Eddie,Ricky,Troy,Randall,Barry,Alexander,Bernard,Mario,Leroy,Francisco,Marcus,Micheal,Theodore,Clifford,Miguel,Oscar,Jay,Jim,Tom,Calvin,Alex,Jon,Ronnie,Bill,Lloyd,Tommy,Leon,Derek,Warren,Darrell,Jerome,Floyd,Leo,Alvin,Tim,Wesley,Gordon,Dean,Greg,Jorge,Dustin,Pedro,Derrick,Dan,Lewis,Zachary,Corey,Herman,Maurice,Vernon,Roberto,Clyde,Glen,Hector,Shane,Ricardo,Sam,Rick,Lester,Brent,Ramon,Charlie,Tyler,Gilbert,Gene,Marc,Reginald,Ruben,Brett,Angel,Nathaniel,Rafael,Leslie,Edgar,Milton,Raul,Ben,Chester,Cecil,Duane,Franklin,Andre,Elmer,Brad,Gabriel,Ron,Mitchell,Roland,Arnold,Harvey,Jared,Adrian,Karl,Cory,Claude,Erik,Darryl,Jamie,Neil,Jessie,Christian,Javier,Fernando,Clinton,Ted,Mathew,Tyrone,Darren,Lonnie,Lance,Cody,Julio,Kelly,Kurt,Allan,Nelson,Guy,Clayton,Hugh,Max,Dwayne,Dwight,Armando,Felix,Jimmie,Everett,Jordan,Ian,Wallace,Ken,Bob,Jaime,Casey,Alfredo,Alberto,Dave,Ivan,Johnnie,Sidney,Byron,Julian,Isaac,Morris,Clifton,Willard,Daryl,Ross,Virgil,Andy,Marshall,Salvador,Perry,Kirk,Sergio,Marion,Tracy,Seth,Kent,Terrance,Rene,Eduardo,Terrence,Enrique,Freddie,Wade,Austin,Stuart,Fredrick,Arturo,Alejandro,Jackie,Joey,Nick,Luther,Wendell,Jeremiah,Evan,Julius,Dana,Donnie,Otis,Shannon,Trevor,Oliver,Luke,Homer,Gerard,Doug,Kenny,Hubert,Angelo" |
---|
13 | |
---|
14 | LAST_NAMES="Smith,Johnson,Williams,Jones,Brown,Davis,Miller,Wilson,Moore,Taylor,Anderson,Thomas,Jackson,White,Harris,Martin,Thompson,Garcia,Martinez,Robinson,Clark,Rodriguez,Lewis,Lee,Walker,Hall,Allen,Young,Hernandez,King,Wright,Lopez,Hill,Scott,Green,Adams,Baker,Gonzalez,Nelson,Carter,Mitchell,Perez,Roberts,Turner,Phillips,Campbell,Parker,Evans,Edwards,Collins,Stewart,Sanchez,Morris,Rogers,Reed,Cook,Morgan,Bell,Murphy,Bailey,Rivera,Cooper,Richardson,Cox,Howard,Ward,Torres,Peterson,Gray,Ramirez,James,Watson,Brooks,Kelly,Sanders,Price,Bennett,Wood,Barnes,Ross,Henderson,Coleman,Jenkins,Perry,Powell,Long,Patterson,Hughes,Flores,Washington,Butler,Simmons,Foster,Gonzales,Bryant,Alexander,Russell,Griffin,Diaz,Hayes,Myers,Ford,Hamilton,Graham,Sullivan,Wallace,Woods,Cole,West,Jordan,Owens,Reynolds,Fisher,Ellis,Harrison,Gibson,Mcdonald,Cruz,Marshall,Ortiz,Gomez,Murray,Freeman,Wells,Webb,Simpson,Stevens,Tucker,Porter,Hunter,Hicks,Crawford,Henry,Boyd,Mason,Morales,Kennedy,Warren,Dixon,Ramos,Reyes,Burns,Gordon,Shaw,Holmes,Rice,Robertson,Hunt,Black,Daniels,Palmer,Mills,Nichols,Grant,Knight,Ferguson,Rose,Stone,Hawkins,Dunn,Perkins,Hudson,Spencer,Gardner,Stephens,Payne,Pierce,Berry,Matthews,Arnold,Wagner,Willis,Ray,Watkins,Olson,Carroll,Duncan,Snyder,Hart,Cunningham,Bradley,Lane,Andrews,Ruiz,Harper,Fox,Riley,Armstrong,Carpenter,Weaver,Greene,Lawrence,Elliott,Chavez,Sims,Austin,Peters,Kelley,Franklin,Lawson,Fields,Gutierrez,Ryan,Schmidt,Carr,Vasquez,Castillo,Wheeler,Chapman,Oliver,Montgomery,Richards,Williamson,Johnston,Banks,Meyer,Bishop,Mccoy,Howell,Alvarez,Morrison,Hansen,Fernandez,Garza,Harvey,Little,Burton,Stanley,Nguyen,George,Jacobs,Reid,Kim,Fuller,Lynch,Dean,Gilbert,Garrett,Romero,Welch,Larson,Frazier,Burke,Hanson,Day,Mendoza,Moreno,Bowman,Medina,Fowler,Brewer,Hoffman,Carlson,Silva,Pearson,Holland,Douglas,Fleming,Jensen,Vargas,Byrd,Davidson,Hopkins,May,Terry,Herrera,Wade,Soto,Walters" |
---|
15 | |
---|
16 | class Learner: |
---|
17 | def __init__(self): |
---|
18 | self.db = {} |
---|
19 | |
---|
20 | def learn(self, text): |
---|
21 | replacements1 = {'[^a-zA-Z0-9\.;:\-]': ' ', |
---|
22 | '\s+': ' ', ', ': ' , ', '\. ': ' . ', |
---|
23 | ': ': ' : ', '; ': ' ; '} |
---|
24 | for key, value in replacements1.items(): |
---|
25 | text = re.sub(key, value, text) |
---|
26 | items = [item.lower() for item in text.split(' ')] |
---|
27 | for i in range(len(items) - 1): |
---|
28 | item = items[i] |
---|
29 | nextitem = items[i + 1] |
---|
30 | if item not in self.db: |
---|
31 | self.db[item] = {} |
---|
32 | if nextitem not in self.db[item]: |
---|
33 | self.db[item][nextitem] = 1 |
---|
34 | else: |
---|
35 | self.db[item][nextitem] += 1 |
---|
36 | |
---|
37 | def save(self, filename): |
---|
38 | with open(filename, 'wb') as fp: |
---|
39 | pickle.dump(self.db, fp) |
---|
40 | |
---|
41 | def load(self, filename): |
---|
42 | with open(filename, 'rb') as fp: |
---|
43 | self.loadd(pickle.load(fp)) |
---|
44 | |
---|
45 | def loadd(self, db): |
---|
46 | self.db = db |
---|
47 | |
---|
48 | def generate(self, length=10000, prefix=False): |
---|
49 | replacements2 = {' ,': ',', ' \.': '.\n', ' :': ':', ' ;': |
---|
50 | ';', '\n\s+': '\n'} |
---|
51 | keys = list(self.db.keys()) |
---|
52 | key = keys[random.randint(0, len(keys) - 1)] |
---|
53 | words = key |
---|
54 | words = words.capitalize() |
---|
55 | regex = re.compile('[a-z]+') |
---|
56 | for i in range(length): |
---|
57 | okey = key |
---|
58 | if not key in self.db: |
---|
59 | break # should not happen |
---|
60 | db = self.db[key] |
---|
61 | s = sum(db.values()) |
---|
62 | i = random.randint(0, s - 1) |
---|
63 | for key, value in db.items(): |
---|
64 | if i < value: |
---|
65 | break |
---|
66 | else: |
---|
67 | i -= value |
---|
68 | if okey == '.': |
---|
69 | key1 = key.capitalize() |
---|
70 | else: |
---|
71 | key1 = key |
---|
72 | if prefix and regex.findall(key1) and \ |
---|
73 | random.random() < 0.01: |
---|
74 | key1 = '<a href="%s%s">%s</a>' % (prefix, key1, key1) |
---|
75 | words += ' ' + key1 |
---|
76 | text = words |
---|
77 | for key, value in replacements2.items(): |
---|
78 | text = re.sub(key, value, text) |
---|
79 | return text + '.\n' |
---|
80 | |
---|
81 | |
---|
82 | def da_du_ma(n=4): |
---|
83 | return ''.join([['da', 'du', 'ma', 'mo', 'ce', 'co', |
---|
84 | 'pa', 'po', 'sa', 'so', 'ta', 'to'] |
---|
85 | [random.randint(0, 11)] for i in range(n)]) |
---|
86 | |
---|
87 | |
---|
88 | def populate(table, n=None, default=True, compute=False, contents=None, ell=None): |
---|
89 | """Populate table with n records. |
---|
90 | |
---|
91 | if n is None, it does not populate the database but returns a generator |
---|
92 | if default=True use default values to fields. |
---|
93 | if compute=False doesn't load values into computed fields. |
---|
94 | if contents has data, use these values to populate related fields. |
---|
95 | |
---|
96 | can be used in two ways: |
---|
97 | |
---|
98 | >>> populate(db.tablename, n=100) |
---|
99 | |
---|
100 | or |
---|
101 | |
---|
102 | >>> for k,row in enumerate(populate(db.tablename)): print row |
---|
103 | """ |
---|
104 | |
---|
105 | contents = contents or {} |
---|
106 | |
---|
107 | generator = populate_generator(table, default=default, |
---|
108 | compute=compute, contents=contents, ell=None) |
---|
109 | if n is not None: |
---|
110 | for k,record in enumerate(generator): |
---|
111 | if k>=n: break |
---|
112 | table.insert(**record) |
---|
113 | table._db.commit() |
---|
114 | |
---|
115 | return generator |
---|
116 | |
---|
117 | def populate_generator(table, default=True, compute=False, contents=None, ell=None): |
---|
118 | """Populate table with n records. |
---|
119 | |
---|
120 | if default=True use default values to fields. |
---|
121 | if compute=False doesn't load values into computed fields. |
---|
122 | if contents has data, use these values to populate related fields. |
---|
123 | """ |
---|
124 | contents = contents or {} |
---|
125 | |
---|
126 | if not ell: |
---|
127 | ell = Learner() |
---|
128 | ell.loadd(IUP) |
---|
129 | |
---|
130 | ids = {} |
---|
131 | |
---|
132 | while True: |
---|
133 | record = contents.copy() # load user supplied contents. |
---|
134 | |
---|
135 | for fieldname in table.fields: |
---|
136 | if fieldname in record: |
---|
137 | continue # if user supplied it, let it be. |
---|
138 | |
---|
139 | field = table[fieldname] |
---|
140 | if not isinstance(field.type, (str, unicodeT)): |
---|
141 | continue |
---|
142 | elif field.type == 'id': |
---|
143 | continue |
---|
144 | elif field.type == 'upload': |
---|
145 | continue |
---|
146 | elif field.compute is not None: |
---|
147 | continue |
---|
148 | elif default and not field.default in (None, ''): |
---|
149 | record[fieldname] = field.default |
---|
150 | elif compute and field.compute: |
---|
151 | continue |
---|
152 | elif field.type == 'boolean': |
---|
153 | record[fieldname] = random.random() > 0.5 |
---|
154 | elif field.type == 'date': |
---|
155 | record[fieldname] = \ |
---|
156 | datetime.date(2009, 1, 1) - \ |
---|
157 | datetime.timedelta(days=random.randint(0, 365)) |
---|
158 | elif field.type == 'datetime': |
---|
159 | record[fieldname] = \ |
---|
160 | datetime.datetime(2009, 1, 1) - \ |
---|
161 | datetime.timedelta(days=random.randint(0, 365)) |
---|
162 | elif field.type == 'time': |
---|
163 | h = random.randint(0, 23) |
---|
164 | m = 15 * random.randint(0, 3) |
---|
165 | record[fieldname] = datetime.time(h, m, 0) |
---|
166 | elif field.type == 'password': |
---|
167 | record[fieldname] = '' |
---|
168 | elif field.type == 'integer' and \ |
---|
169 | hasattr(field.requires, 'options'): |
---|
170 | options = field.requires.options(zero=False) |
---|
171 | if len(options) > 0: |
---|
172 | record[fieldname] = options[ |
---|
173 | random.randint(0, len(options) - 1)][0] |
---|
174 | else: |
---|
175 | record[fieldname] = None |
---|
176 | elif field.type == 'list:integer' and hasattr(field.requires, 'options'): |
---|
177 | options = field.requires.options(zero=False) |
---|
178 | if len(options) > 0: |
---|
179 | record[fieldname] = [item[0] for item in random.sample( |
---|
180 | options, random.randint(0, len(options) - 1) / 2)] |
---|
181 | elif field.type == 'integer': |
---|
182 | try: |
---|
183 | record[fieldname] = random.randint( |
---|
184 | field.requires.minimum, field.requires.maximum - 1) |
---|
185 | except: |
---|
186 | if 'day' in fieldname: |
---|
187 | record[fieldname] = random.randint(1,28) |
---|
188 | elif 'month' in fieldname: |
---|
189 | record[fieldname] =random.randint(1,12) |
---|
190 | elif 'year' in fieldname: |
---|
191 | record[fieldname] =random.randint(2000,2013) |
---|
192 | else: |
---|
193 | record[fieldname] = random.randint(0, 1000) |
---|
194 | elif field.type == 'double' \ |
---|
195 | or str(field.type).startswith('decimal'): |
---|
196 | if hasattr(field.requires, 'minimum'): |
---|
197 | rand = random.random() |
---|
198 | if str(field.type).startswith('decimal'): |
---|
199 | import decimal |
---|
200 | rand = decimal.Decimal(rand) |
---|
201 | record[fieldname] = field.requires.minimum + \ |
---|
202 | rand * (field.requires.maximum - |
---|
203 | field.requires.minimum) |
---|
204 | else: |
---|
205 | record[fieldname] = random.random() * 1000 |
---|
206 | elif field.type[:10] == 'reference ': |
---|
207 | tablename = field.type[10:] |
---|
208 | if not tablename in ids: |
---|
209 | if table._db._dbname == 'gql': |
---|
210 | ids[tablename] = [x.id for x in table._db( |
---|
211 | table._db[field.type[10:]].id > 0).select()] |
---|
212 | else: |
---|
213 | ids[tablename] = [x.id for x in table._db( |
---|
214 | table._db[field.type[10:]].id > 0).select()] |
---|
215 | n = len(ids[tablename]) |
---|
216 | if n: |
---|
217 | record[fieldname] = \ |
---|
218 | ids[tablename][random.randint(0, n - 1)] |
---|
219 | else: |
---|
220 | record[fieldname] = 0 |
---|
221 | elif field.type[:15] == 'list:reference ': |
---|
222 | tablename = field.type[15:] |
---|
223 | if not tablename in ids: |
---|
224 | if table._db._dbname == 'gql': |
---|
225 | ids[tablename] = [x.id for x in table._db( |
---|
226 | table._db[field.type[15:]].id > 0).select()] |
---|
227 | else: |
---|
228 | ids[tablename] = [x.id for x in table._db( |
---|
229 | table._db[field.type[15:]].id > 0).select()] |
---|
230 | n = len(ids[tablename]) |
---|
231 | if n: |
---|
232 | record[fieldname] = [item for item in random.sample( |
---|
233 | ids[tablename], random.randint(0, n - 1) / 2)] |
---|
234 | else: |
---|
235 | record[fieldname] = 0 |
---|
236 | elif field.type == 'list:string' \ |
---|
237 | and hasattr(field.requires, 'options'): |
---|
238 | options = field.requires.options(zero=False) |
---|
239 | if len(options) > 0: |
---|
240 | record[fieldname] = [item[0] for item in random.sample( |
---|
241 | options, random.randint(0, len(options) - 1) / 2)] |
---|
242 | elif field.type == 'string': |
---|
243 | if hasattr(field.requires, 'options'): |
---|
244 | options = field.requires.options(zero=False) |
---|
245 | record[fieldname] = \ |
---|
246 | options[random.randint(0, len(options) - 1)][0] |
---|
247 | elif fieldname.find('url') >= 0: |
---|
248 | record[fieldname] = 'http://%s.example.com' % \ |
---|
249 | da_du_ma(4) |
---|
250 | elif fieldname.find('email') >= 0: |
---|
251 | record[fieldname] = '%s@example.com' % da_du_ma(4) |
---|
252 | elif fieldname.find('name')>=0: |
---|
253 | if fieldname.find('first')>=0: |
---|
254 | record[fieldname] = random.choice(FIRST_NAMES) |
---|
255 | elif fieldname.find('last')>=0: |
---|
256 | record[fieldname] = random.choice(LAST_NAMES) |
---|
257 | elif fieldname.find('username')>=0: |
---|
258 | record[fieldname] = random.choice(FIRST_NAMES).lower()+str(random.randint(1000,9999)) |
---|
259 | else: |
---|
260 | record[fieldname] = random.choice(FIRST_NAMES)+' '+random.choice(LAST_NAMES) |
---|
261 | elif fieldname.find('phone')>=0: |
---|
262 | record[fieldname] = '(%s%s%s) %s%s%s-%s%s%s%s' % ( |
---|
263 | random.choice('1234567890'),random.choice('1234567890'),random.choice('1234567890'),random.choice('1234567890'),random.choice('1234567890'),random.choice('1234567890'),random.choice('1234567890'),random.choice('1234567890'),random.choice('1234567890'),random.choice('1234567890')) |
---|
264 | elif fieldname.find('address') >=0: |
---|
265 | record[fieldname] = '%s %s %s Street' % (random.randint(1000,9000),random.choice(FIRST_NAMES),random.choice(LAST_NAMES)) |
---|
266 | else: |
---|
267 | z = ell.generate(10, prefix=False) |
---|
268 | record[fieldname] = z[:min(60,field.length)].replace('\n', ' ') |
---|
269 | elif field.type == 'text': |
---|
270 | if fieldname.find('address')>=0: |
---|
271 | record[fieldname] = '%s %s %s Street\nChicago, IL\nUSA' % (random.randint(1000,9000),random.choice(FIRST_NAMES),random.choice(LAST_NAMES)) |
---|
272 | else: |
---|
273 | record[fieldname] = ell.generate( |
---|
274 | random.randint(10, 100), prefix=None) |
---|
275 | yield record |
---|
276 | |
---|
277 | if __name__ == '__main__': |
---|
278 | ell = Learner() |
---|
279 | ell.db = IUP |
---|
280 | if len(sys.argv) > 1: |
---|
281 | ell.learn(open(sys.argv[1]).read()) |
---|
282 | print(ell.db) |
---|
283 | print(ell.generate(100)) |
---|