最近常常需要查看LZO文件里面的内容,这些文件通常很大,放在hdfs上。我没有好的方法,我以前偶尔查看其中内容都是直接get到本地然后用lzop解压缩然后再more的。这样做当你偶尔使用的时候即使文件稍微大点,也许也是可以接受的。但现在我需要常常grep里面的内容,就不那么欢乐了。
所以写了个shell脚本lzoc[ lzo cat],用来专门查看HDFS里LZO文件的内容,正常情况下它不输出任何多余的东西,这样就可以和more 、 head、tail等工具一起结合使用了。
代码如下:
它有三个选项:
-c 指示删除已经存在当前目录的同名文件,这往往是为了删除旧的副本而制定的,
-d 指示最后阶段删除当前目录里中间文件,因为我们会把文件从hdfs中get出来
-i 指示输出一些交互信息,如果你cat出来的内容要用作它用,那么你不要使用这个选项
使用示例:
$./lzoc /user/Hadoop/output/filename.lzo | more
#! /bin/sh
#description:
# cat the lzo file on hadoop
filePath="" #full Path of the hadoop lzo file
lzoFileName="" #file with .lzo as extension after hadoop fs -get ....
fileName="" #file name without extension-name
deleteAfterExecute=N #has -c option, which indicates that old files should be deleted
deleteBeforeExecute=N #has -d option, which indicates that related files should be deleted in the final state
interactiveMsg=N #only the text of the file should print
if [ $# -lt 1 ]
then
echo "must has aleast one parameter, which is the fileName."
exit -1
else
#normal command style
eval filePath=\${$#} #get the last parameter, must guarantee that it is less then 9
lzoFileName=${filePath##*/}
fileName=${lzoFileName%.lzo*}
fi
#parase options
if [ $# -gt 1 ]
then
while getopts cdi OPTION
do
case $OPTION
in
c)
deleteBeforeExecute=Y;;
d)
deleteAfterExecute=Y;;
i)
interactiveMsg=Y;;
\?)
echo "illegal option:$OPTION";
exit -2;;
esac
done
fi
#delete old file if needed
if [ $deleteBeforeExecute == "Y" ]; then
if [ -e $fileName ]; then
echo "delete old file"
rm $fileName;
fi
if [ -e $lzoFileName ]; then
echo "delete old lzo file"
rm $lzoFileName
fi
fi
#make sure hadoop is on
which hadoop > /dev/null 2>&1
if [ $? -eq 1 ]; then
echo "Command not exist,hadoop may not have been started."
exit -3
fi
#make sure fileExist,should not be a directory
hadoop fs -test -e $filePath > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "No such file for directory:"$filePath
exit -4
fi
#can not cat a directory
hadoop fs -test -d $filePath > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "Can not cat a directory:"$filePath
exit -4
fi
#make sure lzop is installed
which lzop > /dev/null 2>&1
if [ $? -eq 1 ]; then
echo "Tool missed:lzop is not installed."
exit -5
fi
#test whether lzo file exist
if [ -e $lzoFileName ]; then
if [ $interactiveMsg == "Y" ]; then
echo "LZO file already exist."
fi
else
if [ $interactiveMsg == "Y" ]; then
echo "LZO file not exist."
fi
#get the file from hadoop
hadoop fs -get $filePath .
fi
#test whether file exist
if [ -e $fileName ]; then
if [ $interactiveMsg == "Y" ]; then
echo "File already exist."
fi
else
if [ $interactiveMsg == "Y" ]; then
echo "File not exist."
fi
#decomopress the lzo file
lzop -dv $lzoFileName > /dev/null 2>&1
fi
#clear
#cat the file
cat $fileName
#delete files in the final state is needed
if [ $deleteAfterExecute == "Y" ]
then
if [ -e $fileName ]; then
rm $fileName
fi
if [ -e $lzoFileName ]; then
rm $lzoFileName
fi
if [ $interactiveMsg == "Y" ]; then
echo "files has been deleted"
fi
fi