In PIG Output number of files based on condition and prepare a view for easy access in hive
$PIG_HOME/bin> ./pig -x local /home/tradernyse/pigscripts/nysesplitout.pig
nysesplitout.pig script
x = load '/home/tradernyse/clientTraderDatasets_12216/nyseflumeprocessed.csv' using PigStorage(',') As (f1:chararray,f2:chararray,f3:chararray,f4:chararray,f5:chararray,f6:chararray,f7:chararray,f8:chararray,
f9:float);
split x into x1 if f9 < 50, x12 if f9 >= 50 and f9 < 60, x23 if f9 >= 60 and f9 < 70, x34 if f9 >= 70 and f9 < 80, x45 if f9 >= 80 and f9 <= 90, x5 if f9 >= 90;
store x1 into '/home/tradernyse/clientTraderready_12216/nyselessthan50' using PigStorage(',') ;
store x12 into '/home/tradernyse/clientTraderready_12216/nysebtw50_60' using PigStorage(',') ;
store x23 into '/home/tradernyse/clientTraderready_12216/nysebtw60_70' using PigStorage(',') ;
store x34 into '/home/tradernyse/clientTraderDatasets_12216/nysebtw70_80' using PigStorage(',') ;
store x45 into '/home/tradernyse/clientTraderDatasets_12216/nysebtw80_90' using PigStorage(',') ;
store x5 into 'home/tradernyse/clientTraderDatasets_12216/nysegreater90' using PigStorage(',') ;
No comments:
Post a Comment