1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 To perform Schema Validation.
24
25 Copyright (C) 2004-2009 Tiny SPRL (<http://tiny.be>).
26 GNU General Public License.
27 """
28 import types
29 from etl.component import component
30 import datetime
31 import time
33 """
34 This is an ETL Component that performs Schema Validation.
35
36 Type : Data Component.
37 Computing Performance : Semi-Streamline.
38 Input Flows : 1.
39 * .* : The main data flow with input data.
40 Output Flows : 0-x.
41 * .* : Returns the main flow with Schema Validation Result.
42 * invalid_field : Returns data which has more or less fields.
43 * invalid_name : Returns data which has wrong field name.
44 * invalid_key : Returns data which does not match unique constraint.
45 * invalid_null : Returns data which does not match null constraint
46 * invalid_type : Returns data that fields have invalid type.
47 * invalid_size : Returns data which has more size.
48 * invalid_format : Returns data which does not match with format.
49
50 """
51
52 - def __init__(self, schema, name='component.transform.schema_validator'):
53 """
54 Required Parameters
55 schema : The name of schema
56
57 Extra Parameters
58 name : Name of Component.
59 """
60 super(schema_validator, self).__init__(name=name)
61 self._type = 'component.transfer.schema_validator'
62 self.schema = schema
63
65 res = schema_validator(self.schema, self.name)
66 return res
67
69 for channel, trans in self.input_get().items():
70 for iterator in trans:
71 keys = []
72 for d in iterator:
73 if len(d.keys()) != len(self.schema.keys()):
74 yield d, 'invalid_field'
75 else:
76 channel = 'main'
77 for f in d:
78 if f not in self.schema:
79 channel = 'invalid_name'
80 break
81 if self.schema[f].get('key', False):
82 if not d[f]:
83 channel = "invalid_key"
84 break
85 if d[f] in keys:
86 channel = "invalid_key"
87 break
88 keys.append(d[f])
89
90 if self.schema[f].get('Is_NULL', False):
91 if not d[f]:
92 channel = "invalid_null"
93 break
94
95 if self.schema[f].get('type', False):
96 if type(d[f]) != eval(self.schema[f]['type']):
97 channel = 'invalid_type'
98 break
99
100 if self.schema[f].get('format', False):
101
102
103 if self.schema[f]['type'] == "datetime.date" :
104 try :
105 a = time.strptime(str(d[f]), self.schema[f]['format'])
106 except ValueError, e:
107 channel = "invalid_format"
108 break
109 if self.schema[f].get('size', False):
110 if len(d[f]) > int(self.schema[f]['size']):
111 channel = 'invalid_size'
112 break
113 yield d, channel
114
115
117 from etl_test import etl_test
118 from etl import transformer
119 input_part = [
120 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58},
121 {'id': 2L, 'name': 'Luc', 'active': True, 'birth_date': '2007-02-01', 'amount': 211.25},
122 {'id': 3L, 'name': 'Henry', 'active': True, 'birth_date': '2006-02-01', 'amount': 219.20},
123 ]
124 schema= {
125 'id': {'type': 'long', 'key': True, 'Is_Null': True},
126 'name': {'type': 'str', 'size': '10', 'Is_NULL': False},
127 'active': {'type': 'bool', 'Is_NULL': False},
128 'birth_date': {'type': 'datetime.date', 'Is_NULL': False, 'format': '%y-%m-%d'},
129 'amount': {'type': 'float', 'Is_NULL': True}
130 }
131 test = etl_test.etl_component_test(schema_validator(schema))
132 test.check_input({'main': input_part})
133 print test.output()
134
135 if __name__ == '__main__':
136 test()
137