知轩-小工具

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# date  2019/8/25

import requests
from bs4 import BeautifulSoup
import random, sqlite3

zxcs_urls = {
    "奇幻科幻":  "http://www.zxcs.me/sort/26",
    "都市娱乐":  "http://www.zxcs.me/sort/23",
    "武侠仙侠":  "http://www.zxcs.me/sort/25",
    "科幻灵异":  "http://www.zxcs.me/sort/27",
    "历史军事":  "http://www.zxcs.me/sort/28",
    "竞技游戏":  "http://www.zxcs.me/sort/29",
    "二次元": "http://www.zxcs.me/sort/55",
}
headers = {'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit'}
get_flower_tep = "http://www.zxcs.me/content/plugins/cgz_xinqing/cgz_xinqing_action.php?action=show&id=%s&m=%s"


class Sql_Utils:
    def __init__(self):
        self.filepath = "./book.data"
        self.get_conn()
        self.check_init()

    def get_conn(self):
        try:
            self.conn = sqlite3.connect(self.filepath)
        except Exception as e:
            self.conn = None
            print(e)
        return self.conn

    def check_init(self):
        conn = self.conn
        if conn:
            c = conn.cursor()
            try:
                cursor = c.execute('''
                select * from bookinfos limit 10;
                ''')
                # for row in cursor:
                #     print(row)
            except Exception as e:
                print(e)
                self.__init_database()
            finally:
                conn.close()
        else:
            print("Opened database failed")

    def __init_database(self):
        conn = self.conn
        c = conn.cursor()
        c.execute('''CREATE TABLE bookinfos
               (ID INTEGER  PRIMARY KEY    NOT NULL,
               BOOKID            INT     NOT NULL UNIQUE,
               NAME           CHAR(200)    NOT NULL UNIQUE,
               INFOURL        CHAR(300) NOT NULL,
               DOWNLOADURL        CHAR(300) NOT NULL,
               FLOWERS        CHAR(300) NOT NULL,
               DESCRIPTION    CHAR(1000) NOT NULL,
               SIZE CHAR(30) NOT NULL,
               TYPES  CHAR(30) NOT NULL);''')
        conn.commit()
        conn.close()

    def insert(self, *args, **kwargs):
        bookid = args[0]
        books = self.query(bookid)
        if len(books) == 0:
            if len(kwargs) == 7:
                bookname = kwargs.get("bookname")
                infourl = kwargs.get("infourl")
                downloadurl = kwargs.get("downloadurl")
                size = kwargs.get("size")
                types = kwargs.get("types")
                description = kwargs.get("description")
                flowers = kwargs.get("flowers")
            else:
                raise KeyError
        else:
            bookname = kwargs.get("bookname", books[0].get("bookname"))
            infourl = kwargs.get("infourl", books[0].get("infourl"))
            downloadurl = kwargs.get("downloadurl", books[0].get("downloadurl"))
            size = kwargs.get("size", books[0].get("size"))
            types = kwargs.get("types", books[0].get("types"))
            description = kwargs.get("description", books[0].get("description"))
            flowers = kwargs.get("flowers", books[0].get("flowers"))

        conn = self.get_conn()
        c = conn.cursor()
        try:
            c.execute("""INSERT INTO bookinfos(BOOKID,NAME,INFOURL,DOWNLOADURL,FLOWERS,DESCRIPTION,SIZE,TYPES) 
                VALUES(?,?,?,?,?,?,?,?)""", (bookid, bookname, infourl, downloadurl, flowers, description, size, types))
            conn.commit()
            print("%s  插入成功" % bookname)
        except Exception as e:
            print("%s  插入失败 %s" % (bookname, e))
        finally:
            conn.close()

    def query(self, bookid):
        self.get_conn()
        conn = self.conn
        c = conn.cursor()
        books = []
        try:
            if bookid == -1:
                cursor = c.execute("""select * from bookinfos""")
            else:
                cursor = c.execute("""select * from bookinfos where BOOKID =?""", (bookid,))

            for row in cursor:
                books.append({
                    # "bookid": row[1],
                    "bookname": row[2],
                    "infourl": row[3],
                    # "downloadurl": row[4],
                    "flowers": row[5],
                    "size": row[7],
                    "types": row[8],

                })
        except Exception as e:
            print("查询异常 %s" % e)
        finally:
            conn.close()
        return books


def get_this_end_url(baseurl, headers):
    req = requests.get(baseurl, headers=headers)
    pagelist = []
    endpagurl = ""
    if req.status_code == 200:
        html_body = req.content
        soup = BeautifulSoup(html_body, "html.parser")
        for a in soup.find_all('a'):
            if "page" in a.get("href"):
                pagelist.append(a.get("href"))
            if "尾页" in a.get("title", ""):
                endpagurl = a.get("href")
                break
    else:
        print("url: %s ERROR" % baseurl)
    return endpagurl if endpagurl else max(pagelist, key=lambda x: x.split("/")[-1])


def get_this_page_url(baseurl, headers, types):
    xurl = []
    req = requests.get(baseurl, headers=headers)
    if req.status_code == 200:
        html_body = req.content
        soup = BeautifulSoup(html_body, "html.parser")
        # print(soup.prettify())
        count = 0
        for a in soup.find_all('a'):
            # print(a)
            if a.parent.name in "dt":
                # print({"%s"%a.string:a.get("href")})
                xurl.append({int(a.get("href").split("/")[-1]): {"%s" % a.string: a.get("href")}})
                sql_conn = Sql_Utils()
                infourl = a.get("href")
                bookid = infourl.split("/")[-1]
                books = sql_conn.query(bookid)
                bookname = a.string

                if len(books) != 0:
                    print("%s 已经存在,继续操作" % bookname)
                    continue
                downloadurl = get_book_download_url(bookid, headers)
                flowers = get_book_flowers(bookid, get_flower_tep, headers)
                size, description = get_book_infos(infourl, headers, bookname)

                sql_conn.insert(bookid, bookname=bookname, infourl=infourl,
                                flowers=flowers, downloadurl=downloadurl, size=size,
                                description=description, types=types)
    return xurl


def get_this_all_url(baseurl, headers, types):
    endurl = get_this_end_url(baseurl, headers)

    endnumlist = endurl.split("/")
    all_url = []
    for page in range(1, int(endnumlist[-1]) + 1):
        endnumlist[-1] = str(page)
        print("*" * 200)
        print("%s 总共%s页,现在进行到%s页" % (types, endurl.split("/")[-1], page))
        this_page_url = get_this_page_url("/".join(endnumlist), headers, types)
        # print("page:%s this_page_url:%s"%(page,this_page_url))
        all_url.extend(this_page_url)
    return all_url


def get_book_flowers(bookid, get_flower_tep, headers):
    flower_url = get_flower_tep % (bookid, random.random())
    req = requests.get(flower_url, headers=headers)
    return req.text


def get_book_download_url(bookid, headers):
    baseurl = "http://www.zxcs.me/post/%s" % bookid
    req = requests.get(baseurl, headers=headers)
    if req.status_code == 200:
        html_body = req.content
        soup = BeautifulSoup(html_body, "html.parser")
        for a in soup.find_all('a'):
            if "点击下载" in a.get("title", ""):
                endpagurl = a.get("href")
                return endpagurl
    else:
        print("url: %s ERROR" % baseurl)
        return "ERROR"


def get_book_infos(bookurl, headers, bookname):
    req = requests.get(bookurl, headers=headers)
    if req.status_code == 200:
        html_body = req.content
        soup = BeautifulSoup(html_body, "html.parser")
        sinfo = []
        for a in soup.find_all('p'):
            if "内容简介" in str(a):
                for i in a:
                    si = str(i)
                    if "link" in si:
                        break
                    if "<br/>" in si or "\n" == si:
                        continue
                    sinfo.append(str(i).strip("\r\n").strip("\xa0").strip("\t").strip("\u3000"))
        try:
            return sinfo[0].split(":")[1], "".join(sinfo[2:])
        except Exception as e:
            print("ERROR  %s 大小和描述获取失败 %s" % (bookname, e))
            return "获取失败", "获取失败"


def make_book_data():
    for types, baseurl in zxcs_urls.items():
        get_this_all_url(baseurl, headers, types)


def get_book_l():
    sql_u = Sql_Utils()
    booklist = sql_u.query(-1)

    # 仙草排行
    booklist.sort(key=lambda x: int(x.get("flowers").split(",")[0]), reverse=True)

    # 毒草排行
    # booklist.sort(key=lambda x:int(x.get("flowers").split(",")[-1]),reverse=True)

    # 仙草百分比排行
    # booklist.sort(key=lambda x:float(int(x.get("flowers").split(",")[0])*10/(int(x.get("flowers").split(",")[-1])+int(x.get("flowers").split(",")[0]))),reverse=True)

    for book in booklist:
        print(book)


if __name__ == '__main__':
    '''
    先运行 make_book_data()
    等这个运行之后,就可以查询啦
    '''
    # make_book_data()
    get_book_l()

image.png

类似文章