1 情感分析管道

class SentimentPipeline:
    def __init__(self):
        self.config = Config()  #BERT的配置文件
        self.model = Model(self.config).to(self.config.device) #BERT模型导入

    def process_item(self, item, spider):
        # 可以添加保存数据库或文件的逻辑
        # print(item)
        if isinstance(item, CommentItem):
            print('进行情感分析...')
            GOLD = "\033[38;5;214m"  # 使用色号214表示金色
            RESET = "\033[0m"        # 重置颜色
            text = [item['content']]  # 这个需要根据实际情况修改(注意是一个数组)
            test_data = load_dataset(text, self.config)
            test_iter = build_iterator(test_data, self.config)
            result = final_predict(self.config, self.model, test_iter)
            for i, j in enumerate(result):
                item['label'] = j
                print(f"{GOLD}情感分析微博内容:{item['content']},结果:{j}{RESET}")
            print(item)
        if isinstance(item, DongchediItem):
            # print(item)
            pass
        return item

2 MySQL保存管道

class MySQLPipeline:
    def open_spider(self, spider):
        # 获取数据库配置
        self.connection = get_db_connection()
        self.cursor = self.connection.cursor()
        self.connection.commit()

    def close_spider(self, spider):
        # 关闭数据库连接
        self.cursor.close()
        self.connection.close()

    def process_item(self, item, spider):
        print('MySQL 管道...')
        PURPLE = "\033[95m"  # 紫色
        RESET = "\033[0m"  # 重置颜色
        if isinstance(item, DongchediItem):
            table = 'tb_car'
            insertlog = f"{PURPLE}汽车信息插入数据库成功:{item['car_name']}{RESET}"
        elif isinstance(item, SeriesItem):
            table = 'tb_series'
            insertlog = f"{PURPLE}车型评分插入数据库成功:{item['sid']}{RESET}"
        elif isinstance(item, CommentItem):
            table = 'tb_comment'
            insertlog = f"{PURPLE}评论插入数据库成功:{item['content']}{RESET}"
        else:
            return
        data = dict(item)
        keys = ', '.join(data.keys())
        values = ', '.join(['%s'] * len(data))
        sql = """INSERT INTO {table}({keys}) VALUES ({values}) ON
                                         DUPLICATE KEY UPDATE""".format(table=table,
                                                                        keys=keys,
                                                                        values=values)
        update = ','.join([" {key} = {key}".format(key=key) for key in data])
        sql += update
        try:
            self.cursor.execute(sql, tuple(data.values()))
            self.connection.commit()
            print(insertlog)
        except Exception as E:
            print('**********' + sql)
            print("Error:", E)
            self.connection.rollback()

        return item  # 必须返回 item

3 Settings 开启管道

ITEM_PIPELINES = {
   'car_spider.pipelines.SentimentPipeline': 300,
   'car_spider.pipelines.MySQLPipeline': 301
}

点赞(0) 打赏

评论列表 共有 0 条评论

暂无评论

微信公众账号

微信扫一扫加关注

发表
评论
返回
顶部