'use strict' require('./config.js'); global.Request = require('request'); let lastPinTime = 0,firstRepinTime = 0,currentTask = null,articleCount = 0; let mysqlConfig = { pool:{ name: 'TouTiaoFav', maxconn: 5 }, db:Config.mysqlConfig }; let headers = { 'Host': 'www.toutiao.com','User-Agent':'Paw/3.1 (Macintosh; OS X/10.12.6) GCDHTTPRequest', 'Cookie':'install_id=12936422119; ttreq=1$1176d4ec1d8c4b340741b2ba75742f13f3581fa0; alert_coverage=56; _ga=GA1.2.1364790727.1494317820; _gid=GA1.2.1734828197.1502001335; qh[360]=1; login_flag=0f67bb380c54dc63fb41864f7f44160a; sessionid=7499554ce9453e80a602a56388e65d7b; sid_guard="7499554ce9453e80a602a56388e65d7b|1501589523|2591997|Thu\\054 31-Aug-2017 12:12:00 GMT"; sid_tt=7499554ce9453e80a602a56388e65d7b; uid_tt=bf0e5ab2d584acefc18547a78fada6b0', 'X-SS-Cookie':'install_id=12936422119; ttreq=1$1176d4ec1d8c4b340741b2ba75742f13f3581fa0; alert_coverage=56; _ga=GA1.2.1364790727.1494317820; _gid=GA1.2.1734828197.1502001335; qh[360]=1; login_flag=0f67bb380c54dc63fb41864f7f44160a; sessionid=7499554ce9453e80a602a56388e65d7b; sid_guard="7499554ce9453e80a602a56388e65d7b|1501589523|2591997|Thu\\054 31-Aug-2017 12:12:00 GMT"; sid_tt=7499554ce9453e80a602a56388e65d7b; uid_tt=bf0e5ab2d584acefc18547a78fada6b0' }; global.MysqlPool = require(ROOTPATH + '/lib/mysql-pool.js').instance(mysqlConfig); testMysql(); function testMysql(){ let con = MysqlPool.getConnection(mysqlConfig.pool.name); con.query('SELECT VERSION() as version',function(err,result,fields){ MysqlPool.freeConnection(mysqlConfig.pool.name,con); if(err){ LOGGER.error('Mysql Connect error,please recheck your config'); LOGGER.error(err); }else{ LOGGER.info('Mysql Connect success'); LOGGER.info('Mysql Version: ' + result[0]['version'] + ' | User: ' + Config.mysqlConfig.user + ' | Database: ' + Config.mysqlConfig.database); global.MysqlDB = require(ROOTPATH + '/lib/mysqldb.js'); init(); } }); } function init(){ LOGGER.info(`Last Pin Time: ${FinalRepinTime}|${new Date(FinalRepinTime*1000)}`); currentTask = new Task(); } function finish(){ FinalRepinTime = firstRepinTime; FILE.writeFileSync(FinalRepinFile,FinalRepinTime,'UTF-8'); LOGGER.info('--------------------------------------------------------------'); LOGGER.info('All fav article download finished!!!'); LOGGER.info(`Article analysis count [ ${articleCount} ]`); LOGGER.info('--------------------------------------------------------------'); MysqlPool.end(); } function Task(){ let artticleList = []; let current = -1; let hasMore = false; getList(); function getList(){ let url = Config.listUrl; let args = []; args.push('page_type=2'); args.push(`user_id=${Config.uid}`); args.push('max_behot_time=0'); args.push('count=20'); if(lastPinTime>0) args.push(`max_repin_time=${lastPinTime}`); url += args.join('&'); Request({url:url,encoding: 'utf-8',json:true,headers:headers},function(err,response,body){ if(err){ LOGGER.error(`List get error,max repin time:${lastPinTime}`); LOGGER.error(`URL:${url}`); return; } if(lastPinTime==0){ firstRepinTime = body.data[0].repin_time; if(FinalRepinTime==firstRepinTime){ finish(); return; } } hasMore = body.has_more; lastPinTime = body.max_repin_time; artticleList = body.data; getArticle(); }); } function getArticle(){ current++; if(current==artticleList.length){ return taskFinish(); } let url = Config.articleUrl; let article = artticleList[current]; if(FinalRepinTime == article.repin_time){ finish(); return; } article.content = ''; url += article.item_id; Request({url:url,encoding: 'utf-8',headers:headers},function(err,response,body){ if(err){ LOGGER.error(`Article get error,max repin time:${lastPinTime},article id:${article.item_id}`); LOGGER.error(`URL:${url}`); return; } let content; if(article.has_gallery){ content = body.match(/gallery: (.*),/); if(content && content.length>=2){ content = JSON.parse(content[1]).sub_images; let arr = []; for(let i in content){ arr.push(content[i].url); } article.content = arr.join(','); } }else if(article.has_video){ content = body.match(/shareUrl: '(.*)'/); if(content && content.length>=2) article.content = content[1]; }else{ content = body.match(/content: '(.*)'\.replace/); if(content && content.length>=2) article.content = (content[1]).replace(/\"/g,'\\"'); } if(article.content==''){ getArticle(); return; } insertArticle(article); }); } function insertArticle(article){ // console.log(article.content); let tempArr = []; let type = article.has_video?2:article.has_gallery?1:0; tempArr.push(`"${article.title.replace(/\"/g,'\\"')}"`); tempArr.push(type); tempArr.push(`"${article.chinese_tag}"`); tempArr.push(`"${article.image_url}"`); tempArr.push(`"${article.abstract.replace(/\"/g,'\\"')}"`); tempArr.push(article.gallary_image_count); tempArr.push(`"${article.display_url}"`); tempArr.push(`"${article.item_id}"`); let sql = `insert IGNORE into list (title,type,tag,cover,abstract,gallaryCount,source,item_id) values(${tempArr.join(',')})`; MysqlDB.query(sql,function(err,result){ if(err){ LOGGER.error(`Article insert error,article id:${article.item_id}`); LOGGER.error(`SQL:${sql}`); return; } sql = `insert ignore into article (articleID,content) values("${article.item_id}","${article.content}")` MysqlDB.query(sql,function(err,result){ if(err){ LOGGER.error(`Article insert error,article id:${article.item_id}`); LOGGER.error(`SQL:${sql}`); return; } articleCount++; getArticle(); }); }); } function taskFinish(){ LOGGER.info('List Finished!'); if(hasMore){ LOGGER.info('More List read!!!'); currentTask = null; currentTask = new Task(); }else{ finish(); } } }