v1
天上一颗蛋
·
·
个人记录
import os
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import json
from twist import bend_png, cover
character_set_path = '字符集.txt'
class generator(object):
def __init__(self, scheme, every_char=100):
self.stop = False
self.every_char = every_char
self.scheme = scheme
with open(character_set_path, encoding='utf-8') as f:
chars = f.read()
character_set = set(chars)
self.character_set = character_set
self.length = 0
self.height = 0
if self.scheme == 'Necaptcha':
self.char_num = range(4, 6)
# self.colors = ['orange', 'blue', 'yellow', 'black', 'brown',
# (245, 213, 100), (30, 50, 87),(55, 51, 66), (106, 154, 202), (108, 155, 197), (230, 231, 153),
# (52, 58, 44), (140, 222, 138), (125, 164, 39), (84, 85, 129), (145, 187, 59), (208, 92, 141)]
self.colors = [(8, 15, 13), (8, 15, 13), (8, 15, 13), (100, 213, 236), (101, 212, 65), (236, 229, 131),
(71, 51, 101), (108, 224, 237), (223, 138, 177), (154, 223, 143), (202, 128, 232)]
self.picture = np.zeros((3200, 1600))#判断哪个像素放了字
self.background = 'Necaptcha_background'
self.font = ['字体库/WIN7版雅黑 粗体.ttf']
self.max_angle = 45
self.size_range = (300, 340)#字体大小范围
list_num = [every_char] * 3755
characters = list(self.character_set)#把参数转换为列表
self.count_dic = dict(zip(characters, list_num))#建立对应检索并计数 桶计数
if self.scheme == 'sougou':
self.char_num = 3
self.colors = [(8, 15, 13)]
self.picture = np.zeros((1530, 600))
self.background = 'sougou_background'
self.font = ['字体库/YRDZSTJF_DeutschExtension.ttf']#暂定 多加几个
self.max_angle = 20
self.size_range = (300, 300)
list_num = [every_char] * 11 #有十1个数
self.character_set = ['零', '一', '二', '三', '四', '五', '六', '七', '八', '九', '十']
characters = list(self.character_set)
self.count_dic = dict(zip(characters, list_num))
if self.scheme == 'YY':
self.char_num = range(6, 7)
self.colors = ['red', 'blue', 'black', 'brown', 'green']
self.picture = np.zeros((3400, 1600))
self.background = 'YY_background'
self.font = ['字体库/迷你简彩云_mianfeiziti.com.ttf', '字体库/江城黑体 500W.ttf']
self.max_angle = 45
self.size_range = (400, 420)
list_num = [every_char] * 3755
characters = list(self.character_set)
self.count_dic = dict(zip(characters, list_num))
if self.scheme == 'renmin':
self.char_num = range(2, 3)
self.colors = [(107, 104, 67), (32, 38, 59), (56, 41, 60), (95, 55, 78), (31, 66, 42), (44, 69, 96),
(109, 78, 56), (96, 93, 110), (123, 121, 75), (93, 41, 91)]
self.picture = np.zeros((1500, 400))
self.background = 'renmin_background'
self.font = ['字体库/江城黑体 500W.ttf']
self.max_angle = 45
self.size_range = (210, 220)
list_num = [every_char] * 3755
characters = list(self.character_set)
self.count_dic = dict(zip(characters, list_num))
if self.scheme == 'dajie':
self.char_num = range(6, 7)
self.colors = [(30, 137, 31), (205, 0, 108), (136, 0, 255), (0, 122, 255), (205, 8, 112), (40, 40, 39),
(0, 0, 255), (34, 139, 34)]
self.picture = np.zeros((3620, 1250))
self.background = 'dajie_background'
self.font = ['字体库/开目宋体.ttf', '字体库/NotoSansSCMedium.ttf']
self.max_angle = 45
self.size_range = (260, 280)
list_num = [every_char] * 3755
characters = list(self.character_set)
self.count_dic = dict(zip(characters, list_num))
if self.scheme == 'baidu':
self.char_num = range(5, 6)
self.colors = [(223, 222, 195), (217, 179, 219), (214, 178, 179), (194, 241, 197), (179, 186, 215),
(27, 39, 41), (71, 41, 42), (62, 37, 15), (74, 38, 94), (7, 69, 44), (97, 97, 62),
(190, 237, 241)]
self.picture = np.zeros((3360, 1800))
self.background = 'baidu_background'
self.font = ['字体库/STZONGYI.TTF']
self.max_angle = 80
self.size_range = (250, 260)
list_num = [every_char] * 3755
characters = list(self.character_set)
self.count_dic = dict(zip(characters, list_num))
if self.scheme == 'Geetest':
self.char_num = range(2, 5)
self.colors = [(255, 229, 129), (255, 214, 115), (220, 106, 10), (46, 251, 217), (46, 209, 240),
(207, 237, 110), (239, 253, 86), (229, 66, 21), (195, 255, 172), (44, 72, 216)]
self.picture = np.zeros((3440, 3420))
self.background = 'Geetest_background'
self.font = ['字体库/文道潮黑.ttf', '字体库/jiangxi.ttf'] # '字体库/ARKai_T.ttf',
self.max_angle = 45
self.size_range = (660, 680)
list_num = [every_char] * 3755
characters = list(self.character_set)
self.count_dic = dict(zip(characters, list_num))
if self.scheme == 'shumei':
self.char_num = range(4, 5)
self.colors = [(5, 254, 13), (254, 0, 0), (3, 3, 244), (245, 176, 179), (254, 253, 7), (2, 255, 251),
(252, 199, 8), ]
self.picture = np.zeros((3000, 1500))
self.background = 'shumei_background'
self.font = ['字体库/包图创意体.ttf']
self.max_angle = 45
self.size_range = (300, 320)
list_num = [every_char] * 3755
characters = list(self.character_set)
self.count_dic = dict(zip(characters, list_num))
if self.scheme == 'dingxiang':
self.char_num = range(5, 6)
self.colors = [(200, 199, 180), (201, 185, 35), (125, 208, 198), (158, 216, 165), (247, 233, 233)]
self.picture = np.zeros((3740, 1870))
self.background = 'dingxiang_background'
self.font = ['字体库/王汉宗空心楷体简.ttf', '字体库/YRDZSTJF_DeutschExtension.ttf',
'字体库/SweiSpringSugarCJKtc-Regular.ttf', '字体库/华文宋体_mianfeiziti.com.ttf']
self.max_angle = 45
self.size_range = (540, 560)
list_num = [every_char] * 3755
characters = list(self.character_set)
self.count_dic = dict(zip(characters, list_num))
# 为字符添加纹理
def add_texture(self, img, box):
var = 8
if self.scheme == 'Geetest':
var = 14
xmin, ymin, xmax, ymax = box
Img = np.array(img)
for i in range(ymin, ymax):
for j in range(xmin, xmax):
if Img[i][j][-1] != 0:
RGB = np.array((random.gauss(0, var), random.gauss(0, var), random.gauss(0, var))).astype(
'int64')
RGB += Img[i][j][0:-1].astype('int64')
RGB = RGB.clip(0, 255)
Img[i][j][0:-1] = RGB.astype('uint8')
img = Image.fromarray(np.uint8(Img))
return img
# 随机抓取背景图片
def get_background(self):
path = self.background
length = len(os.listdir(path))
index = random.randint(0, length - 1)
background = Image.open(path + '/' + os.listdir(path)[index]).convert('RGBA')
l, h = background.size
background = background.resize((l*10, h*10))#缩放十倍加字
return background, l, h
# 添加随机不重复的字符,且位置不重叠,并返回字体的位置信息
def add_font(self, background):
global captcha
if self.scheme == 'sougou':
char_num = 2
else:
char_num = random.choice(self.char_num)
colors = self.colors
picture = self.picture
captcha, self.length, self.height = background
font = self.font
max_angle = self.max_angle
size_range = self.size_range
boxes = []
chars = []
min_size, max_size = size_range
picture[50:picture.shape[0] - max_size - 80, 50:picture.shape[1] - max_size - 80] = 1
if self.scheme == 'renmin':
picture[300:picture.shape[0] - max_size - 300, 300:picture.shape[1] - max_size - 300] = 1
picture[750 - max_size:150, ] = 0
if self.scheme == 'Necaptcha':
picture[picture.shape[0] - 1000 - max_size: picture.shape[0],
picture.shape[1] - 300 - max_size:picture.shape[1]] = 0
if self.scheme == 'dingxiang':
picture[picture.shape[0] - 1300 - max_size: picture.shape[0],
0: 600] = 0
try:
char_s = random.sample(list(self.count_dic.keys()), char_num)#随机选字
except:
self.stop = True
return
pop_list = []
for key in self.count_dic.keys():
if key in char_s:
self.count_dic[key] -= 1
if self.count_dic[key] == 0:
pop_list.append(key)
for item in pop_list:
self.count_dic.pop(item)
if self.scheme == 'sougou':
font_ttf1 = random.choice(font)
font_ttf2 = random.choice(font)
font_ttf3 = random.choice(font)
char1 = char_s[0]
char3 = char_s[1]
option = random.choice(range(0,3))
if option == 0:
char2 = '加'
elif option == 1:
char2 = '减'
else:
char2 = '乘'
print(char1, char2, char3)
color1 = random.choice(colors)
color2 = random.choice(colors)
color3 = random.choice(colors)
angle1 = random.randint(-max_angle, max_angle)
angle2 = random.randint(-max_angle, max_angle)
angle3 = random.randint(-max_angle, max_angle)
size1 = random.randint(min_size, max_size)
size2 = random.randint(min_size, max_size)
size3 = random.randint(min_size, max_size)
im1 = Image.new(mode='RGBA', size=captcha.size, color=(0, 0, 0, 0))
im2 = Image.new(mode='RGBA', size=captcha.size, color=(0, 0, 0, 0))
im3 = Image.new(mode='RGBA', size=captcha.size, color=(0, 0, 0, 0))
ft1 = ImageFont.truetype(font_ttf1, size1)#设定字体
ft2 = ImageFont.truetype(font_ttf2, size2) # 设定字体
ft3 = ImageFont.truetype(font_ttf3, size3)
draw1 = ImageDraw.Draw(im1)
draw2 = ImageDraw.Draw(im2)
draw3 = ImageDraw.Draw(im3)
x_non_zero, y_non_zero = np.nonzero(picture)
location_non_zero = list(zip(x_non_zero, y_non_zero))
location1 = random.choice(list(location_non_zero)) # 选取非零区域
x1, y1 = location1
picture[x1 - max_size if x1 - max_size > 0 else 0:x1 + max_size,
y1 - max_size if y1 - max_size > 0 else 0:y1 + max_size] = 0
x_non_zero, y_non_zero = np.nonzero(picture)
location_non_zero = list(zip(x_non_zero, y_non_zero))
location2 = random.choice(list(location_non_zero)) # 选取非零区域
x2, y2 = location2
picture[x2 - max_size if x2 - max_size > 0 else 0:x2 + max_size,
y2 - max_size if y2 - max_size > 0 else 0:y2 + max_size] = 0
x_non_zero, y_non_zero = np.nonzero(picture)
location_non_zero = list(zip(x_non_zero, y_non_zero))
location3 = random.choice(list(location_non_zero)) # 选取非零区域
x3, y3 = location3
picture[x3 - max_size if x3 - max_size > 0 else 0:x3 + max_size,
y3 - max_size if y3 - max_size > 0 else 0:y3 + max_size] = 0
if x1 > x2: #冒个泡
x1, x2 = x2, x1
y1, y2 = y2, y1
if x2 > x3:
x2, x3 = x3, x2
y2, y3 = y3, y2
if x1 > x2:
x1, x2 = x2, x1
y1, y2 = y2, y1
location1 = (x1, y1)
location2 = (x2, y2)
location3 = (x3, y3)
print(x1, y1, x2, y2, x3, y3)
box1 = (int((x1 - 50) / 10.0), int((y1 - 50) / 10.0),
int((x1 + max_size + 80) / 10.0),
int((y1 + max_size + 80) / 10.0))
box2 = (int((x2 - 50) / 10.0), int((y2 - 50) / 10.0),
int((x2 + max_size + 80) / 10.0),
int((y2 + max_size + 80) / 10.0))
box3 = (int((x3 - 50) / 10.0), int((y3 - 50) / 10.0),
int((x3 + max_size + 80) / 10.0),
int((y3 + max_size + 80) / 10.0))
draw1.text(location1, char1, font=ft1, fill=color1)
draw2.text(location2, char2, font=ft2, fill=color2)
draw3.text(location3, char3, font=ft3, fill=color3)
"""
im1 = im1.rotate(angle=angle1, center=(x1 + size1 // 2, y1 + size1 // 2))
im2 = im2.rotate(angle=angle2, center=(x2 + size2 // 2, y2 + size2 // 2))
im3 = im3.rotate(angle=angle3, center=(x3 + size3 // 2, y3 + size3 // 2))
w_h1 = random.random() * 0.1
w_v1 = random.random() * 0.1
im1 = bend_png(im1, box1, 7, w_h1, w_v1)
w_h2 = random.random() * 0.1
w_v2 = random.random() * 0.1
im2 = bend_png(im2, box2, 7, w_h2, w_v2)
w_h3 = random.random() * 0.1
w_v3 = random.random() * 0.1
im3 = bend_png(im3, box3, 7, w_h3, w_v3)
"""
captcha = Image.alpha_composite(captcha, im1)
captcha = Image.alpha_composite(captcha, im2)
captcha = Image.alpha_composite(captcha, im3)
boxes.append(box1)
chars.append(char1)
boxes.append(box2)
chars.append(char2)
boxes.append(box3)
chars.append(char3)
else:
for i in range(0, char_num):
font_ttf = random.choice(font)
char = char_s[i]
color = random.choice(colors)
if font_ttf == '字体库/王汉宗空心楷体简.ttf':
color = (247, 233, 233)
angle = random.randint(-max_angle, max_angle)
size = random.randint(min_size, max_size)
im0 = Image.new(mode='RGBA', size=captcha.size, color=(0, 0, 0, 0))
ft = ImageFont.truetype(font_ttf, size)#设定字体
draw = ImageDraw.Draw(im0)
x_non_zero, y_non_zero = np.nonzero(picture)
location_non_zero = list(zip(x_non_zero, y_non_zero))
location = random.choice(list(location_non_zero))#选取非零区域
x, y = location
picture[x - max_size if x - max_size > 0 else 0:x + max_size,
y - max_size if y - max_size > 0 else 0:y + max_size] = 0
if self.scheme == 'Geetest':
draw.text(location, char, font=ft, fill=(8, 0, 12))
location = (x + 20, y + 20)
ft = ImageFont.truetype(font_ttf, size - 40)
draw.text(location, char, font=ft, fill=color)
if self.scheme == 'renmin':#画box
box = (int((x - 30)/10.0), int((y - 30)/10.0),
int((x + max_size + 20)/10.0),
int((y + max_size + 20)/10.0))
else:
box = (int((x - 50)/10.0), int((y - 50)/10.0),
int((x + max_size + 80)/10.0),
int((y + max_size + 80)/10.0))
if self.scheme == 'Necaptcha':
im0 = cover(im0, x, y, size)
im0 = im0.rotate(angle=angle, center=(x + size // 2, y + size // 2))
# if self.scheme == 'Necaptcha' or self.scheme == 'Geetest':
# im0 = self.add_texture(im0, box)
if self.scheme == 'Necaptcha':
w_h = random.random() * 0.1
w_v = random.random() * 0.1
im0 = bend_png(im0, box, 7, w_h, w_v)
captcha = Image.alpha_composite(captcha, im0)
boxes.append(box)
chars.append(char)
# captcha.show()
captcha = captcha.resize((self.length, self.height))
# captcha = captcha.filter(ImageFilter.GaussianBlur(radius=2))
return captcha, chars, boxes
def save_char(self, captcha, chars, boxes, n):
shapes = []
for char, box in zip(chars, boxes):
single_char = captcha.crop(box)
path = self.scheme + '_chars'
num = len(os.listdir(path)) + 1
# 保存图片
x1, y1, x2, y2 = box
x1 = str(x1)
y1 = str(y1)
x2 = str(x2)
y2 = str(y2)
single_char = single_char.convert('RGB')
single_char.save(path + '/' + str(num) + '.png')
m = len(os.listdir(path + '_label/')) + 1
f = open(path + '_label/' + str(m) + '.txt', 'w', encoding='utf-8')
f.write(char)
f.close()
shape = {'label': char, 'points': [[x1, y1], [x2, y2]]}
shapes.append(shape)
json_text = {'shapes': shapes}
jsondata = json.dumps(json_text, indent=4, separators=(',', ': '))
f = open(self.scheme + '/captcha_label_' + str(n) + '.json', 'w', encoding='utf-8')
f.write(jsondata)
f.close()
def generate(self, num=9999999999):
for i in range(num):
path = self.scheme
num = int(len(os.listdir(path)) / 2) + 1
captcha = self.get_background()
captcha, chars, boxes = self.add_font(captcha)
# GAN
# captcha = GAN(captcha,boxes)
# 保存单个字符图片,标签
self.save_char(captcha, chars, boxes, num)
captcha = captcha.convert("RGB")
# 保存图片
captcha.save(path + "/captcha_" + str(num) + '.png')
print(i + 1)
if self.stop:
print('已经遍历所有字符%d次' % self.every_char)
exit(0)
# def save_label(self,char,locations):