Is there an easy way to determine why a mistral workflow still sits on Running
even though
the last action that ran within the workflow returned Failed
? In the past I found it was
because the action that would get called next (there’s a catch-all on-error
in each action)
had some bad jinja - but I removed all the jinja and the workflow is still Running
.
# st2 --version
st2 2.6.0
e.g.
$ st2 execution get 5ae9db797709b603ee05d827
id: 5ae9db797709b603ee05d827
action.ref: [redacted]_pdb_refresh.checkin
parameters:
callback_authorization_path: secret/cloud/prod/api_token
callback_url: none_specified
cdb_host: clouddb01.test.[redacted].com
cdb_version: 12.1.0.2
desktop_db: false
pdb_class: prt
pdb_name: prt_c572297f4f2a49529cf3802fbd
status: running (412s elapsed)
start_timestamp: Wed, 02 May 2018 15:38:32 UTC
end_timestamp:
+--------------------------+------------------------+---------------------+---------------------+-----------------+
| id | status | task | action | start_timestamp |
+--------------------------+------------------------+---------------------+---------------------+-----------------+
| 5ae9db7c7709b60bcb3e8b09 | succeeded (3s elapsed) | queue_state | [redacted]_common.loop_un | Wed, 02 May |
| | | | til | 2018 15:38:35 |
| | | | | UTC |
| 5ae9db7e7709b60bcb3e8b0b | succeeded (3s elapsed) | set_environment_var | consul.kv_get | Wed, 02 May |
| | | iable | | 2018 15:38:38 |
| | | | | UTC |
| 5ae9db827709b60bcb3e8b0d | failed (3s elapsed) | shutdown_pdb | [redacted]_dba_oracle.run | Wed, 02 May |
| | | | _sql | 2018 15:38:42 |
| | | | | UTC |
+--------------------------+------------------------+---------------------+---------------------+-----------------+
Here’s my workflow:
version: "2.0"
name: [redacted]_pdb_refresh.checkin
description: >
Perform a checkin on a PDB
workflows:
main:
type: direct
input:
- cdb_user
- vault_path
- pdb_class
- pdb_name
- callback_url
- desktop_db
- cdb_host
- cdb_version
- ancillary_email
- callback_authorization_path
output:
messages: <% $.status_message %>
tasks:
# This task will check the value of [redacted]/stackstorm/[redacted]_pdb_refresh/loop_until/{{ _.pdb_class }}/enabled in Consul
# if it is 'true' then we'll continue on with the workflow. If it's false
# then we loop. This is a way of 'pausing' the queue...
queue_state:
action: [redacted]_common.loop_until
input:
loop_key: [redacted]/stackstorm/[redacted]_pdb_refresh/loop_until/{{ _.pdb_class }}/enabled
loop_method: consul
loop_until_value: "true"
sleep_seconds: 30
loop_iterations: 5
loop_forever: true
on-success:
- set_environment_variable
on-error:
- post_notification
publish:
status_message: "Queue appears to be unpaused. Proceeding with PDB checkin"
publish-on-error:
status_message: "Failed to complete task `queue_state`"
# This task just sets an environment variable for the user who submitted this workflow.
set_environment_variable:
action: consul.kv_get
input:
key: [redacted]/stackstorm/[redacted]_pdb_refresh/cdb_hosts
from_json: true
publish:
environment_name: <% switch(bool($.pdb_name) => $.pdb_name, bool(env().get('__actions').get('st2.action').st2_context.parent.get('api_user')) => "dtdb_"+$.pdb_class+"_"+env().get('__actions').get('st2.action').st2_context.parent.api_user, 1 = 1 => env().get('__actions').get('st2.action').st2_context.parent.user) %>
new_pdb_name: <% switch(bool($.desktop_db) => concat('dtdb_', $.pdb_class, '_', $.pdb_name.toLower()), 1=1 => $.pdb_name.toLower()) %>
cdb_servicename: <% task('set_environment_variable').result.result[1].Value[$.pdb_class].cdb_map[$.cdb_host][$.cdb_version] %>
on-success:
- shutdown_pdb
shutdown_pdb:
action: [redacted]_dba_oracle.run_sql
input:
host: "{{ _.cdb_host }}"
user: "{{ _.cdb_user }}"
vault_path: "{{ _.vault_path }}"
service_name: "{{ _.cdb_servicename }}"
bad_sql_ok: false
sysoper: false
sql: >
DECLARE
PDB_NAME VARCHAR2(64) := '{{ _.new_pdb_name }}';
pdb_already_closed EXCEPTION ;
PRAGMA EXCEPTION_INIT(pdb_already_closed, -65020);
pdb_doesnot_exist EXCEPTION ;
PRAGMA EXCEPTION_INIT(pdb_doesnot_exist, -1031);
BEGIN
EXECUTE IMMEDIATE 'ALTER PLUGGABLE DATABASE '||PDB_NAME||'
CLOSE ABORT';
EXCEPTION
WHEN pdb_already_closed THEN
dbms_output.put_line('PDB '||PDB_NAME||' is already closed.');
NULL;
WHEN pdb_doesnot_exist THEN
dbms_output.put_line('PDB '||PDB_NAME||' does not exist.');
NULL;
WHEN OTHERS THEN
dbms_output.put_line('Some other error occured. '||sqlerrm);
raise;
END;
publish-on-error:
status_message: "Failed to complete task shutdown_pdb: {{ task('shutdown_pdb').result.result.ERRORS }}"
publish:
status_message: "Successfully shutdown PDB {{ _.pdb_name }}"
on-success:
- drop_pdb: <% $.desktop_db = true %>
- umount_pdb_vol: <% $.desktop_db = false %>
on-error:
- post_notification
umount_pdb_vol:
action: [redacted]_dba_oracle.unmount_nfs_volume
input:
hostnames:
- "{{ _.cdb_host }}"
mountpoint: "/oradata/cloud/{{ _.pdb_class }}/{{ _.new_pdb_name }}"
destroy: true
cls: "{{ _.pdb_class }}"
publish-on-error:
status_message: "Failed to complete task unmount_pdb_vol: {{ task('umount_pdb_vol').result.result.errors[0] }}"
publish:
status_message: "Successfully completed task unmount_pdb_vol: {{ task('umount_pdb_vol').result.result.results[0] }}"
on-success:
- drop_pdb
on-error:
- post_notification
drop_pdb:
action: [redacted]_dba_oracle.run_sql
input:
host: "{{ _.cdb_host }}"
user: "{{ _.cdb_user }}"
vault_path: "{{ _.vault_path }}"
service_name: "{{ _.cdb_servicename }}"
bad_sql_ok: false
sysoper: false
sql: >
DECLARE
PDB_NAME VARCHAR2(30) := '{{ _.new_pdb_name }}';
pdb_doesnot_exist EXCEPTION ;
PRAGMA EXCEPTION_INIT(pdb_doesnot_exist, -65011);
BEGIN
EXECUTE IMMEDIATE 'DROP PLUGGABLE DATABASE '||PDB_NAME||'
INCLUDING DATAFILES';
EXCEPTION WHEN pdb_doesnot_exist THEN
dbms_output.put_line('This PDB doesnt seem to exist.');
NULL;
WHEN OTHERS THEN
dbms_output.put_line('Some other error occured. '||sqlerrm);
raise;
END;
publish-on-error:
status_message: "Failed to complete task drop_pdb: {{ task('drop_pdb').result.result.ERRORS }}"
publish:
status_message: "Successfully dropped pdb {{ _.new_pdb_name }}"
on-success:
- make_callback_succeeded: "{{ _.callback_url != 'none_specified' }}"
on-error:
- post_notification
make_callback_succeeded:
action: core.http
input:
url: "{{ _.callback_url }}"
body: '{"status":"succeeded","pdb_class":"{{ _.pdb_class }}","pdb_name":"{{ _.pdb_name }}"}'
method: PUT
verify_ssl_cert: false
on-error:
- post_notification
post_notification:
action: slack.chat.postMessage
input:
channel: "dba_alerts"
text: |
There was a failure checking in the pdb *{{ _.pdb_name }}* from host *{{ _.cdb_host }}*
```{{ _.status_message }}```
For more information check the stackstorm execution_id @
https://dbaautomation.dba.overstock.com.com/#/history/{{ env().st2_execution_id }}/general
username: "StackStorm Bot"
icon_emoji: ":robot-arm:"
as_user: "false"
on-complete:
- fail