Package etl :: Package component :: Package transform :: Module unique'
[hide private]
[frames] | no frames]

Source Code for Module etl.component.transform.unique'

 1  # -*- encoding: utf-8 -*- 
 2  ############################################################################## 
 3  # 
 4  #    ETL system- Extract Transfer Load system 
 5  #    Copyright (C) 2004-2009 Tiny SPRL (<http://tiny.be>). All Rights Reserved 
 6  #    $Id$ 
 7  # 
 8  #    This program is free software: you can redistribute it and/or modify 
 9  #    it under the terms of the GNU General Public License as published by 
10  #    the Free Software Foundation, either version 3 of the License, or 
11  #    (at your option) any later version. 
12  # 
13  #    This program is distributed in the hope that it will be useful, 
14  #    but WITHOUT ANY WARRANTY; without even the implied warranty of 
15  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
16  #    GNU General Public License for more details. 
17  # 
18  #    You should have received a copy of the GNU General Public License 
19  #    along with this program.  If not, see <http://www.gnu.org/licenses/>. 
20  # 
21  ############################################################################## 
22  """ 
23   To perform unique operation. 
24   
25   Copyright (C) 2004-2009 Tiny SPRL (<http://tiny.be>). 
26   GNU General Public License. 
27  """ 
28   
29  from etl.component import component 
30 -class unique(component):
31 """ 32 This is an ETL Component that performs unique operation. 33 34 Type : Data Component. 35 Computing Performance : Semi-Streamline. 36 Input Flows : 1. 37 * .* : The main data flow with input data. 38 Output Flows : 0-x. 39 * .* : Returns the main flow with unique result. 40 * .* : Returns the duplicate flow with duplicate Result. 41 """ 42
43 - def __init__(self, name='component.transform.unique'):
44 super(unique, self).__init__(name=name ) 45 self._type = 'component.transfer.unique'
46
47 - def __copy__(self):
48 res = sort(self.name) 49 return res
50
51 - def process(self):
52 unique_datas = [] 53 duplicate_datas = [] 54 for channel, trans in self.input_get().items(): 55 for iterator in trans: 56 for d in iterator: 57 if d in unique_datas: 58 yield d, "duplicate" 59 else : 60 unique_datas.append(d) 61 for d in unique_datas: 62 yield d, "main"
63
64 -def test():
65 66 from etl_test import etl_test 67 from etl import transformer 68 input_part = [ 69 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58}, 70 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58}, 71 {'id': 3L, 'name': 'Henry', 'active': True, 'birth_date': '2006-02-01', 'amount': 219.20}, 72 ] 73 unique_part = [ 74 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58}, 75 {'id': 3L, 'name': 'Henry', 'active': True, 'birth_date': '2006-02-01', 'amount': 219.20}, 76 ] 77 duplicate_part = [ 78 {'id': 1L, 'name': 'Fabien', 'active': True, 'birth_date': '2009-02-01', 'amount': 209.58}, 79 ] 80 test = etl_test.etl_component_test(unique()) 81 test.check_input({'main': input_part}) 82 test.check_output(unique_part, 'main') 83 print test.output()
84 85 if __name__ == '__main__': 86 test() 87