v1

· · 个人记录

import os
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import json
from twist import bend_png, cover

character_set_path = '字符集.txt'

class generator(object):

    def __init__(self, scheme, every_char=100):
        self.stop = False
        self.every_char = every_char
        self.scheme = scheme
        with open(character_set_path, encoding='utf-8') as f:
            chars = f.read()
            character_set = set(chars)
            self.character_set = character_set
        self.length = 0
        self.height = 0
        if self.scheme == 'Necaptcha':
            self.char_num = range(4, 6)
            # self.colors = ['orange', 'blue', 'yellow', 'black', 'brown',
            #                (245, 213, 100), (30, 50, 87),(55, 51, 66), (106, 154, 202), (108, 155, 197), (230, 231, 153),
            #                (52, 58, 44), (140, 222, 138), (125, 164, 39), (84, 85, 129), (145, 187, 59), (208, 92, 141)]
            self.colors = [(8, 15, 13), (8, 15, 13), (8, 15, 13), (100, 213, 236), (101, 212, 65), (236, 229, 131),
                           (71, 51, 101), (108, 224, 237), (223, 138, 177), (154, 223, 143), (202, 128, 232)]
            self.picture = np.zeros((3200, 1600))#判断哪个像素放了字
            self.background = 'Necaptcha_background'
            self.font = ['字体库/WIN7版雅黑 粗体.ttf']
            self.max_angle = 45
            self.size_range = (300, 340)#字体大小范围
            list_num = [every_char] * 3755
            characters = list(self.character_set)#把参数转换为列表
            self.count_dic = dict(zip(characters, list_num))#建立对应检索并计数 桶计数

        if self.scheme == 'sougou':
            self.char_num = 3
            self.colors = [(8, 15, 13)]
            self.picture = np.zeros((1530, 600))
            self.background = 'sougou_background'
            self.font = ['字体库/YRDZSTJF_DeutschExtension.ttf']#暂定 多加几个
            self.max_angle = 20
            self.size_range = (300, 300)
            list_num = [every_char] * 11 #有十1个数
            self.character_set = ['零', '一', '二', '三', '四', '五', '六', '七', '八', '九', '十']
            characters = list(self.character_set)
            self.count_dic = dict(zip(characters, list_num))

        if self.scheme == 'YY':
            self.char_num = range(6, 7)
            self.colors = ['red', 'blue', 'black', 'brown', 'green']
            self.picture = np.zeros((3400, 1600))
            self.background = 'YY_background'
            self.font = ['字体库/迷你简彩云_mianfeiziti.com.ttf', '字体库/江城黑体 500W.ttf']
            self.max_angle = 45
            self.size_range = (400, 420)
            list_num = [every_char] * 3755
            characters = list(self.character_set)
            self.count_dic = dict(zip(characters, list_num))

        if self.scheme == 'renmin':
            self.char_num = range(2, 3)
            self.colors = [(107, 104, 67), (32, 38, 59), (56, 41, 60), (95, 55, 78), (31, 66, 42), (44, 69, 96),
                           (109, 78, 56), (96, 93, 110), (123, 121, 75), (93, 41, 91)]
            self.picture = np.zeros((1500, 400))
            self.background = 'renmin_background'
            self.font = ['字体库/江城黑体 500W.ttf']
            self.max_angle = 45
            self.size_range = (210, 220)
            list_num = [every_char] * 3755
            characters = list(self.character_set)
            self.count_dic = dict(zip(characters, list_num))

        if self.scheme == 'dajie':
            self.char_num = range(6, 7)
            self.colors = [(30, 137, 31), (205, 0, 108), (136, 0, 255), (0, 122, 255), (205, 8, 112), (40, 40, 39),
                           (0, 0, 255), (34, 139, 34)]
            self.picture = np.zeros((3620, 1250))
            self.background = 'dajie_background'
            self.font = ['字体库/开目宋体.ttf', '字体库/NotoSansSCMedium.ttf']
            self.max_angle = 45
            self.size_range = (260, 280)
            list_num = [every_char] * 3755
            characters = list(self.character_set)
            self.count_dic = dict(zip(characters, list_num))

        if self.scheme == 'baidu':
            self.char_num = range(5, 6)
            self.colors = [(223, 222, 195), (217, 179, 219), (214, 178, 179), (194, 241, 197), (179, 186, 215),
                           (27, 39, 41), (71, 41, 42), (62, 37, 15), (74, 38, 94), (7, 69, 44), (97, 97, 62),
                           (190, 237, 241)]
            self.picture = np.zeros((3360, 1800))
            self.background = 'baidu_background'
            self.font = ['字体库/STZONGYI.TTF']
            self.max_angle = 80
            self.size_range = (250, 260)
            list_num = [every_char] * 3755
            characters = list(self.character_set)
            self.count_dic = dict(zip(characters, list_num))

        if self.scheme == 'Geetest':
            self.char_num = range(2, 5)
            self.colors = [(255, 229, 129), (255, 214, 115), (220, 106, 10), (46, 251, 217), (46, 209, 240),
                           (207, 237, 110), (239, 253, 86), (229, 66, 21), (195, 255, 172), (44, 72, 216)]
            self.picture = np.zeros((3440, 3420))
            self.background = 'Geetest_background'
            self.font = ['字体库/文道潮黑.ttf', '字体库/jiangxi.ttf']  # '字体库/ARKai_T.ttf',
            self.max_angle = 45
            self.size_range = (660, 680)
            list_num = [every_char] * 3755
            characters = list(self.character_set)
            self.count_dic = dict(zip(characters, list_num))

        if self.scheme == 'shumei':
            self.char_num = range(4, 5)
            self.colors = [(5, 254, 13), (254, 0, 0), (3, 3, 244), (245, 176, 179), (254, 253, 7), (2, 255, 251),
                           (252, 199, 8), ]
            self.picture = np.zeros((3000, 1500))
            self.background = 'shumei_background'
            self.font = ['字体库/包图创意体.ttf']
            self.max_angle = 45
            self.size_range = (300, 320)
            list_num = [every_char] * 3755
            characters = list(self.character_set)
            self.count_dic = dict(zip(characters, list_num))

        if self.scheme == 'dingxiang':
            self.char_num = range(5, 6)
            self.colors = [(200, 199, 180), (201, 185, 35), (125, 208, 198), (158, 216, 165), (247, 233, 233)]
            self.picture = np.zeros((3740, 1870))
            self.background = 'dingxiang_background'
            self.font = ['字体库/王汉宗空心楷体简.ttf', '字体库/YRDZSTJF_DeutschExtension.ttf',
                         '字体库/SweiSpringSugarCJKtc-Regular.ttf', '字体库/华文宋体_mianfeiziti.com.ttf']
            self.max_angle = 45
            self.size_range = (540, 560)
            list_num = [every_char] * 3755
            characters = list(self.character_set)
            self.count_dic = dict(zip(characters, list_num))

    # 为字符添加纹理
    def add_texture(self, img, box):
        var = 8
        if self.scheme == 'Geetest':
            var = 14
        xmin, ymin, xmax, ymax = box
        Img = np.array(img)
        for i in range(ymin, ymax):
            for j in range(xmin, xmax):
                if Img[i][j][-1] != 0:
                    RGB = np.array((random.gauss(0, var), random.gauss(0, var), random.gauss(0, var))).astype(
                        'int64')
                    RGB += Img[i][j][0:-1].astype('int64')
                    RGB = RGB.clip(0, 255)
                    Img[i][j][0:-1] = RGB.astype('uint8')
        img = Image.fromarray(np.uint8(Img))
        return img

    # 随机抓取背景图片
    def get_background(self):
        path = self.background
        length = len(os.listdir(path))
        index = random.randint(0, length - 1)
        background = Image.open(path + '/' + os.listdir(path)[index]).convert('RGBA')
        l, h = background.size
        background = background.resize((l*10, h*10))#缩放十倍加字
        return background, l, h

    # 添加随机不重复的字符,且位置不重叠,并返回字体的位置信息
    def add_font(self, background):

        global captcha
        if self.scheme == 'sougou':
            char_num = 2
        else:
            char_num = random.choice(self.char_num)
        colors = self.colors
        picture = self.picture
        captcha, self.length, self.height = background
        font = self.font
        max_angle = self.max_angle
        size_range = self.size_range
        boxes = []
        chars = []
        min_size, max_size = size_range
        picture[50:picture.shape[0] - max_size - 80, 50:picture.shape[1] - max_size - 80] = 1

        if self.scheme == 'renmin':
            picture[300:picture.shape[0] - max_size - 300, 300:picture.shape[1] - max_size - 300] = 1
            picture[750 - max_size:150, ] = 0

        if self.scheme == 'Necaptcha':
            picture[picture.shape[0] - 1000 - max_size: picture.shape[0],
            picture.shape[1] - 300 - max_size:picture.shape[1]] = 0

        if self.scheme == 'dingxiang':
            picture[picture.shape[0] - 1300 - max_size: picture.shape[0],
            0: 600] = 0

        try:
            char_s = random.sample(list(self.count_dic.keys()), char_num)#随机选字
        except:
            self.stop = True
            return

        pop_list = []
        for key in self.count_dic.keys():
            if key in char_s:
                self.count_dic[key] -= 1
            if self.count_dic[key] == 0:
                pop_list.append(key)
        for item in pop_list:
            self.count_dic.pop(item)

        if self.scheme == 'sougou':
            font_ttf1 = random.choice(font)
            font_ttf2 = random.choice(font)
            font_ttf3 = random.choice(font)
            char1 = char_s[0]
            char3 = char_s[1]
            option = random.choice(range(0,3))
            if option == 0:
                char2 = '加'
            elif option == 1:
                char2 = '减'
            else:
                char2 = '乘'

            print(char1, char2, char3)

            color1 = random.choice(colors)
            color2 = random.choice(colors)
            color3 = random.choice(colors)
            angle1 = random.randint(-max_angle, max_angle)
            angle2 = random.randint(-max_angle, max_angle)
            angle3 = random.randint(-max_angle, max_angle)
            size1 = random.randint(min_size, max_size)
            size2 = random.randint(min_size, max_size)
            size3 = random.randint(min_size, max_size)
            im1 = Image.new(mode='RGBA', size=captcha.size, color=(0, 0, 0, 0))
            im2 = Image.new(mode='RGBA', size=captcha.size, color=(0, 0, 0, 0))
            im3 = Image.new(mode='RGBA', size=captcha.size, color=(0, 0, 0, 0))
            ft1 = ImageFont.truetype(font_ttf1, size1)#设定字体
            ft2 = ImageFont.truetype(font_ttf2, size2)  # 设定字体
            ft3 = ImageFont.truetype(font_ttf3, size3)
            draw1 = ImageDraw.Draw(im1)
            draw2 = ImageDraw.Draw(im2)
            draw3 = ImageDraw.Draw(im3)

            x_non_zero, y_non_zero = np.nonzero(picture)
            location_non_zero = list(zip(x_non_zero, y_non_zero))
            location1 = random.choice(list(location_non_zero))  # 选取非零区域
            x1, y1 = location1
            picture[x1 - max_size if x1 - max_size > 0 else 0:x1 + max_size,
            y1 - max_size if y1 - max_size > 0 else 0:y1 + max_size] = 0

            x_non_zero, y_non_zero = np.nonzero(picture)
            location_non_zero = list(zip(x_non_zero, y_non_zero))
            location2 = random.choice(list(location_non_zero))  # 选取非零区域
            x2, y2 = location2
            picture[x2 - max_size if x2 - max_size > 0 else 0:x2 + max_size,
            y2 - max_size if y2 - max_size > 0 else 0:y2 + max_size] = 0

            x_non_zero, y_non_zero = np.nonzero(picture)
            location_non_zero = list(zip(x_non_zero, y_non_zero))
            location3 = random.choice(list(location_non_zero))  # 选取非零区域
            x3, y3 = location3
            picture[x3 - max_size if x3 - max_size > 0 else 0:x3 + max_size,
            y3 - max_size if y3 - max_size > 0 else 0:y3 + max_size] = 0

            if x1 > x2:   #冒个泡
                x1, x2 = x2, x1
                y1, y2 = y2, y1
            if x2 > x3:
                x2, x3 = x3, x2
                y2, y3 = y3, y2
            if x1 > x2:
                x1, x2 = x2, x1
                y1, y2 = y2, y1

            location1 = (x1, y1)
            location2 = (x2, y2)
            location3 = (x3, y3)

            print(x1, y1, x2, y2, x3, y3)

            box1 = (int((x1 - 50) / 10.0), int((y1 - 50) / 10.0),
                   int((x1 + max_size + 80) / 10.0),
                   int((y1 + max_size + 80) / 10.0))
            box2 = (int((x2 - 50) / 10.0), int((y2 - 50) / 10.0),
                   int((x2 + max_size + 80) / 10.0),
                   int((y2 + max_size + 80) / 10.0))
            box3 = (int((x3 - 50) / 10.0), int((y3 - 50) / 10.0),
                   int((x3 + max_size + 80) / 10.0),
                   int((y3 + max_size + 80) / 10.0))

            draw1.text(location1, char1, font=ft1, fill=color1)
            draw2.text(location2, char2, font=ft2, fill=color2)
            draw3.text(location3, char3, font=ft3, fill=color3)
            """
            im1 = im1.rotate(angle=angle1, center=(x1 + size1 // 2, y1 + size1 // 2))
            im2 = im2.rotate(angle=angle2, center=(x2 + size2 // 2, y2 + size2 // 2))
            im3 = im3.rotate(angle=angle3, center=(x3 + size3 // 2, y3 + size3 // 2))

            w_h1 = random.random() * 0.1
            w_v1 = random.random() * 0.1
            im1 = bend_png(im1, box1, 7, w_h1, w_v1)

            w_h2 = random.random() * 0.1
            w_v2 = random.random() * 0.1
            im2 = bend_png(im2, box2, 7, w_h2, w_v2)

            w_h3 = random.random() * 0.1
            w_v3 = random.random() * 0.1
            im3 = bend_png(im3, box3, 7, w_h3, w_v3)
            """
            captcha = Image.alpha_composite(captcha, im1)
            captcha = Image.alpha_composite(captcha, im2)
            captcha = Image.alpha_composite(captcha, im3)

            boxes.append(box1)
            chars.append(char1)
            boxes.append(box2)
            chars.append(char2)
            boxes.append(box3)
            chars.append(char3)

        else:
            for i in range(0, char_num):
                font_ttf = random.choice(font)
                char = char_s[i]
                color = random.choice(colors)
                if font_ttf == '字体库/王汉宗空心楷体简.ttf':
                    color = (247, 233, 233)
                angle = random.randint(-max_angle, max_angle)
                size = random.randint(min_size, max_size)
                im0 = Image.new(mode='RGBA', size=captcha.size, color=(0, 0, 0, 0))
                ft = ImageFont.truetype(font_ttf, size)#设定字体
                draw = ImageDraw.Draw(im0)
                x_non_zero, y_non_zero = np.nonzero(picture)
                location_non_zero = list(zip(x_non_zero, y_non_zero))
                location = random.choice(list(location_non_zero))#选取非零区域

                x, y = location
                picture[x - max_size if x - max_size > 0 else 0:x + max_size,
                y - max_size if y - max_size > 0 else 0:y + max_size] = 0
                if self.scheme == 'Geetest':
                    draw.text(location, char, font=ft, fill=(8, 0, 12))
                    location = (x + 20, y + 20)
                    ft = ImageFont.truetype(font_ttf, size - 40)
                draw.text(location, char, font=ft, fill=color)
                if self.scheme == 'renmin':#画box
                    box = (int((x - 30)/10.0), int((y - 30)/10.0),
                           int((x + max_size + 20)/10.0),
                           int((y + max_size + 20)/10.0))
                else:
                    box = (int((x - 50)/10.0), int((y - 50)/10.0),
                           int((x + max_size + 80)/10.0),
                           int((y + max_size + 80)/10.0))
                if self.scheme == 'Necaptcha':
                    im0 = cover(im0, x, y, size)
                im0 = im0.rotate(angle=angle, center=(x + size // 2, y + size // 2))

                # if self.scheme == 'Necaptcha' or self.scheme == 'Geetest':
                #     im0 = self.add_texture(im0, box)
                if self.scheme == 'Necaptcha':
                    w_h = random.random() * 0.1
                    w_v = random.random() * 0.1
                    im0 = bend_png(im0, box, 7, w_h, w_v)
                captcha = Image.alpha_composite(captcha, im0)
                boxes.append(box)
                chars.append(char)
                # captcha.show()
        captcha = captcha.resize((self.length, self.height))
        # captcha = captcha.filter(ImageFilter.GaussianBlur(radius=2))
        return captcha, chars, boxes

    def save_char(self, captcha, chars, boxes, n):
        shapes = []
        for char, box in zip(chars, boxes):
            single_char = captcha.crop(box)
            path = self.scheme + '_chars'
            num = len(os.listdir(path)) + 1
            # 保存图片
            x1, y1, x2, y2 = box
            x1 = str(x1)
            y1 = str(y1)
            x2 = str(x2)
            y2 = str(y2)
            single_char = single_char.convert('RGB')
            single_char.save(path + '/' + str(num) + '.png')
            m = len(os.listdir(path + '_label/')) + 1
            f = open(path + '_label/' + str(m) + '.txt', 'w', encoding='utf-8')
            f.write(char)
            f.close()
            shape = {'label': char, 'points': [[x1, y1], [x2, y2]]}
            shapes.append(shape)
        json_text = {'shapes': shapes}
        jsondata = json.dumps(json_text, indent=4, separators=(',', ': '))
        f = open(self.scheme + '/captcha_label_' + str(n) + '.json', 'w', encoding='utf-8')
        f.write(jsondata)
        f.close()

    def generate(self, num=9999999999):
        for i in range(num):
            path = self.scheme
            num = int(len(os.listdir(path)) / 2) + 1
            captcha = self.get_background()
            captcha, chars, boxes = self.add_font(captcha)

            # GAN
            # captcha = GAN(captcha,boxes)
            # 保存单个字符图片,标签
            self.save_char(captcha, chars, boxes, num)
            captcha = captcha.convert("RGB")
            # 保存图片
            captcha.save(path + "/captcha_" + str(num) + '.png')
            print(i + 1)
            if self.stop:
                print('已经遍历所有字符%d次' % self.every_char)
                exit(0)

    # def save_label(self,char,locations):