Shell脚本模板
#!/bin/bash#Usage:# ./start-comm-job_pro.sh#------------------------------------------------------#Filename: start-comm-job_pro.sh#Revision: 1.0#Date: 2017/10/30#Author: #Description: #Notes:#------------------------------------------------------#定义该脚本执行必须输入两个参数:行业ID和日期,如果参数输入个数少于2就会报错#调用语句如下sh /home/shutong/crawl/script/start-comm-job_pro.sh ${industry_id} ${date_id}Usage="Usage: $0 industry_id date_id "#[ $# -lt 2 ] && echo "${Usage}" && exit -1#define alias time and bring into effect,alias就是命名一个较长命令的别名,这里用dt代替'date +%Y-%m-%d" "%H:%M:%S',后面的shopt命令就是使这个别名转化在整个脚本里生效alias dt='date +%Y-%m-%d" "%H:%M:%S'shopt -s expand_aliases#script_dir=/home/shutong/crawl/script/#mysql_dir='/home/shutong/mariadb-5.5.57-linux-x86_64/bin'#date_hour_id=`date -d "1 days ago" +%Y%m%d%H`#date_id=`date -d "0 days ago" +%Y%m%d`#判断日期是否有参数,如没有参数默认取当天,参数优先if [ ! -n "$1" ];then date_id=`date -d "0 days ago" +%Y%m%d` #echo "参数不存在,默认参数为今天${date_id}"else date_id=$1 #echo "参数存在:${date_id}"fi#定义数据库名称db='label'#mysql服务器地址host=192.168.0.112#mysql用户名user=root#密码passwd=root #定义数据文件,格式为:${industry_id}_${date_id}.csv,#存放目录统一放在/home/shutong/crawl/data/${industry_id}#例如:汽车行业20171208日数据:004004_20171208.csvfilename="/home/shutong/crawl/data/${industry_id}/${industry_id}_${date_id}.csv"#把数据文件导入src_url表中function load_to_src_url(){ filename=$1 tab_nm=$2 #cd ${mysql_dir} mysql -h ${host} -u${user} -p${ passwd} -e "use ${db}; delete from "$2" where date_id = '${date_id}'; LOAD DATA LOCAL INFILE '${filename}' INTO TABLE "$2" character set utf8FIELDS TERMINATED BY '^' LINES TERMINATED BY '\n'; "}#开始爬取数据python /home/shutong/crawl/36bigdata/crawl_bigdata_article.py "${date_id}"#文章标题数据存放路径#filename="/home/shutong/crawl/36bigdata/input/${date_id}/article_title_info_${date_id}.csv"#load_to_src_url ${filename} "src_article_title"#文章内容数据存放路径#filename="/home/shutong/crawl/36bigdata/input/${date_id}/article_context_info_${date_id}.csv"#load_to_src_url ${filename} "src_article_context"#hdfs dfs -mkdir -p /input/36bigdata/src_article_title/"${date_id}"#hdfs dfs -put ${filename} /input/36bigdata/src_article_title/${date_id}#hive -e "use label; alter table src_article_title add partition(date_id=${date_id}) location 'hdfs://hadoop:9000/input/36bigdata/src_article_title/${date_id}';"
Shell日期参数
#判断是否有参数,如没有参数默认取当天,参数优先if [ ! -n "$2" ];then date_id=`date -d "0 days ago" +%Y%m%d` #echo "参数不存在,默认参数为今天${date_id}"else date_id=$1 #echo "参数存在:${date_id}"fi#日期的前3天before_3_days=`date -d "${date_id} 3 days ago" +%Y%m%d`echo "`dt`:date_id is--${date_id}"echo "`dt`:before_3_days--${before_3_days}"