1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 To perform unique operation.
24
25 Copyright (C) 2004-2009 Tiny SPRL (<http://tiny.be>).
26 GNU General Public License.
27 """
28
29 from etl.component import component
31 """
32 This is an ETL Component that performs unique operation.
33
34 Type : Data Component.
35 Computing Performance : Semi-Streamline.
36 Input Flows : 1.
37 * .* : The main data flow with input data.
38 Output Flows : 0-x.
39 * .* : Returns the main flow with unique result.
40 * .* : Returns the duplicate flow with duplicate Result.
41 """
42
43 - def __init__(self, name='component.transform.unique'):
44 super(unique, self).__init__(name=name )
45 self._type = 'component.transfer.unique'
46
48 res = sort(self.name)
49 return res
50
52 unique_datas = []
53 duplicate_datas = []
54 for channel, trans in self.input_get().items():
55 for iterator in trans:
56 for d in iterator:
57 if d in unique_datas:
58 yield d, "duplicate"
59 else :
60 unique_datas.append(d)
61 for d in unique_datas:
62 yield d, "main"
63
65
66 from etl_test import etl_test
67 from etl import transformer
68 input_part = [
69 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58},
70 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58},
71 {'id': 3L, 'name': 'Henry', 'active': True, 'birth_date': '2006-02-01', 'amount': 219.20},
72 ]
73 unique_part = [
74 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58},
75 {'id': 3L, 'name': 'Henry', 'active': True, 'birth_date': '2006-02-01', 'amount': 219.20},
76 ]
77 duplicate_part = [
78 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58},
79 ]
80 test = etl_test.etl_component_test(unique())
81 test.check_input({'main': input_part})
82 test.check_output(unique_part, 'main')
83 print test.output()
84
85 if __name__ == '__main__':
86 test()
87