Retry on Exception connection loss

This commit is contained in:
alesapin 2019-03-13 19:47:02 +03:00
parent cf8001b6a9
commit 49448b4a00

View File

@ -30,6 +30,10 @@ MSG_UNKNOWN = OP_SQUARE_BRACKET + colored(" UNKNOWN ", "yellow", attrs=['bold'])
MSG_OK = OP_SQUARE_BRACKET + colored(" OK ", "green", attrs=['bold']) + CL_SQUARE_BRACKET
MSG_SKIPPED = OP_SQUARE_BRACKET + colored(" SKIPPED ", "cyan", attrs=['bold']) + CL_SQUARE_BRACKET
MESSAGES_TO_RETRY = [
"Coordination::Exception: Connection loss",
]
def remove_control_characters(s):
"""
@ -44,6 +48,28 @@ def remove_control_characters(s):
s = re.sub(ur"[\x00-\x08\x0b\x0e-\x1f\x7f]", "", s)
return s
def run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file):
if ext == '.sql':
command = "{0} --send_logs_level={1} --testmode --multiquery < {2} > {3} 2> {4}".format(args.client, server_logs_level, case_file, stdout_file, stderr_file)
else:
command = "{} > {} 2> {}".format(case_file, stdout_file, stderr_file)
proc = Popen(command, shell = True)
start_time = datetime.now()
while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
sleep(0.01)
stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else ''
stdout = unicode(stdout, errors='replace', encoding='utf-8')
stderr = open(stderr_file, 'r').read() if os.path.exists(stderr_file) else ''
stderr = unicode(stderr, errors='replace', encoding='utf-8')
return proc, stdout, stderr
def need_retry(stderr):
return any(msg in stderr for msg in MESSAGES_TO_RETRY)
def main(args):
SERVER_DIED = False
@ -201,16 +227,7 @@ def main(args):
stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout'
stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'
if ext == '.sql':
command = "{0} --send_logs_level={1} --testmode --multiquery < {2} > {3} 2> {4}".format(args.client, server_logs_level, case_file, stdout_file, stderr_file)
else:
command = "{} > {} 2> {}".format(case_file, stdout_file, stderr_file)
proc = Popen(command, shell = True)
start_time = datetime.now()
while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
sleep(0.01)
proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file)
if proc.returncode is None:
try:
proc.kill()
@ -224,10 +241,12 @@ def main(args):
failures += 1
print("{0} - Timeout!".format(MSG_FAIL))
else:
stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else ''
stdout = unicode(stdout, errors='replace', encoding='utf-8')
stderr = open(stderr_file, 'r').read() if os.path.exists(stderr_file) else ''
stderr = unicode(stderr, errors='replace', encoding='utf-8')
counter = 1
while proc.returncode != 0 and need_retry(stderr):
proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file)
sleep(2**counter)
if counter > 6:
break
if proc.returncode != 0:
failure = et.Element("failure", attrib = {"message": "return code {}".format(proc.returncode)})