123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- 'use strict'
- require('./config.js');
- global.Request = require('request');
- let lastPinTime = 0,firstRepinTime = 0,currentTask = null,articleCount = 0;
- let mysqlConfig = {
- pool:{
- name: 'TouTiaoFav',
- maxconn: 5
- },
- db:Config.mysqlConfig
- };
- let headers = {
- 'Host': 'www.toutiao.com','User-Agent':'Paw/3.1 (Macintosh; OS X/10.12.6) GCDHTTPRequest',
- 'Cookie':'install_id=12936422119; ttreq=1$1176d4ec1d8c4b340741b2ba75742f13f3581fa0; alert_coverage=56; _ga=GA1.2.1364790727.1494317820; _gid=GA1.2.1734828197.1502001335; qh[360]=1; login_flag=0f67bb380c54dc63fb41864f7f44160a; sessionid=7499554ce9453e80a602a56388e65d7b; sid_guard="7499554ce9453e80a602a56388e65d7b|1501589523|2591997|Thu\\054 31-Aug-2017 12:12:00 GMT"; sid_tt=7499554ce9453e80a602a56388e65d7b; uid_tt=bf0e5ab2d584acefc18547a78fada6b0',
- 'X-SS-Cookie':'install_id=12936422119; ttreq=1$1176d4ec1d8c4b340741b2ba75742f13f3581fa0; alert_coverage=56; _ga=GA1.2.1364790727.1494317820; _gid=GA1.2.1734828197.1502001335; qh[360]=1; login_flag=0f67bb380c54dc63fb41864f7f44160a; sessionid=7499554ce9453e80a602a56388e65d7b; sid_guard="7499554ce9453e80a602a56388e65d7b|1501589523|2591997|Thu\\054 31-Aug-2017 12:12:00 GMT"; sid_tt=7499554ce9453e80a602a56388e65d7b; uid_tt=bf0e5ab2d584acefc18547a78fada6b0'
- };
- global.MysqlPool = require(ROOTPATH + '/lib/mysql-pool.js').instance(mysqlConfig);
- testMysql();
- function testMysql(){
- let con = MysqlPool.getConnection(mysqlConfig.pool.name);
- con.query('SELECT VERSION() as version',function(err,result,fields){
- MysqlPool.freeConnection(mysqlConfig.pool.name,con);
- if(err){
- LOGGER.error('Mysql Connect error,please recheck your config');
- LOGGER.error(err);
- }else{
- LOGGER.info('Mysql Connect success');
- LOGGER.info('Mysql Version: ' + result[0]['version'] + ' | User: ' + Config.mysqlConfig.user + ' | Database: ' + Config.mysqlConfig.database);
- global.MysqlDB = require(ROOTPATH + '/lib/mysqldb.js');
- init();
- }
- });
- }
- function init(){
- LOGGER.info(`Last Pin Time: ${FinalRepinTime}|${new Date(FinalRepinTime*1000)}`);
- currentTask = new Task();
- }
- function finish(){
- FinalRepinTime = firstRepinTime;
- FILE.writeFileSync(FinalRepinFile,FinalRepinTime,'UTF-8');
- LOGGER.info('--------------------------------------------------------------');
- LOGGER.info('All fav article download finished!!!');
- LOGGER.info(`Article analysis count [ ${articleCount} ]`);
- LOGGER.info('--------------------------------------------------------------');
- MysqlPool.end();
- }
- function Task(){
- let artticleList = [];
- let current = -1;
- let hasMore = false;
- getList();
- function getList(){
- let url = Config.listUrl;
- let args = [];
- args.push('page_type=2');
- args.push(`user_id=${Config.uid}`);
- args.push('max_behot_time=0');
- args.push('count=20');
- if(lastPinTime>0) args.push(`max_repin_time=${lastPinTime}`);
- url += args.join('&');
- Request({url:url,encoding: 'utf-8',json:true,headers:headers},function(err,response,body){
- if(err){
- LOGGER.error(`List get error,max repin time:${lastPinTime}`);
- LOGGER.error(`URL:${url}`);
- return;
- }
-
- if(lastPinTime==0){
- firstRepinTime = body.data[0].repin_time;
- if(FinalRepinTime==firstRepinTime){
- finish();
- return;
- }
- }
- hasMore = body.has_more;
- lastPinTime = body.max_repin_time;
- artticleList = body.data;
- getArticle();
- });
- }
- function getArticle(){
- current++;
- if(current==artticleList.length){
- return taskFinish();
- }
- let url = Config.articleUrl;
- let article = artticleList[current];
- if(FinalRepinTime == article.repin_time){
- finish();
- return;
- }
- article.content = '';
- url += article.item_id;
- Request({url:url,encoding: 'utf-8',headers:headers},function(err,response,body){
- if(err){
- LOGGER.error(`Article get error,max repin time:${lastPinTime},article id:${article.item_id}`);
- LOGGER.error(`URL:${url}`);
- return;
- }
- let content;
- if(article.has_gallery){
- content = body.match(/gallery: (.*),/);
- if(content && content.length>=2){
- content = JSON.parse(content[1]).sub_images;
- let arr = [];
- for(let i in content){
- arr.push(content[i].url);
- }
- article.content = arr.join(',');
- }
- }else if(article.has_video){
- content = body.match(/shareUrl: '(.*)'/);
- if(content && content.length>=2) article.content = content[1];
- }else{
- content = body.match(/content: '(.*)'\.replace/);
- if(content && content.length>=2) article.content = (content[1]).replace(/\"/g,'\\"');
- }
- if(article.content==''){
- getArticle();
- return;
- }
- insertArticle(article);
- });
- }
- function insertArticle(article){
- // console.log(article.content);
- let tempArr = [];
- let type = article.has_video?2:article.has_gallery?1:0;
- tempArr.push(`"${article.title.replace(/\"/g,'\\"')}"`);
- tempArr.push(type);
- tempArr.push(`"${article.chinese_tag}"`);
- tempArr.push(`"${article.image_url}"`);
- tempArr.push(`"${article.abstract.replace(/\"/g,'\\"')}"`);
- tempArr.push(article.gallary_image_count);
- tempArr.push(`"${article.display_url}"`);
- tempArr.push(`"${article.item_id}"`);
- let sql = `insert IGNORE into list (title,type,tag,cover,abstract,gallaryCount,source,item_id) values(${tempArr.join(',')})`;
- MysqlDB.query(sql,function(err,result){
- if(err){
- LOGGER.error(`Article insert error,article id:${article.item_id}`);
- LOGGER.error(`SQL:${sql}`);
- return;
- }
- sql = `insert ignore into article (articleID,content) values("${article.item_id}","${article.content}")`
- MysqlDB.query(sql,function(err,result){
- if(err){
- LOGGER.error(`Article insert error,article id:${article.item_id}`);
- LOGGER.error(`SQL:${sql}`);
- return;
- }
- articleCount++;
- getArticle();
- });
- });
- }
- function taskFinish(){
- LOGGER.info('List Finished!');
- if(hasMore){
- LOGGER.info('More List read!!!');
- currentTask = null;
- currentTask = new Task();
- }else{
- finish();
- }
- }
- }
|