Jebal
2020.01.28
jupyter
summary
PDF Read In¶
LDA Analysis¶
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 24 16:04:44 2019
@author: rj
"""
#%% IMPORTS
#import xlrd
import spacy
import pandas as pd
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer
import random
import gensim
from gensim import corpora
import pickle
import pyLDAvis.gensim
spacy.load('en')
from spacy.lang.en import English
parser = English()
nltk.download('stopwords')
nltk.download('wordnet')
en_stop = set(nltk.corpus.stopwords.words('english'))
#%% INPUT DATA
loc = ('/Users/rj/Documents/Hacking_4_Defense/python_code/Example IGEMS data.xlsx') #Insert filepath to the IGEMS Data
"""
wb = xlrd.open_workbook(loc)
'sheet = wb.sheet_by_index(0)
"""
data = pd.read_excel(loc)
df = pd.DataFrame(data, columns= ['Index','Name','Organization','Type','Comment'])
df = df.dropna() #Get rid of NaN rows
print(df)
#%% FUNCTIONS
def tokenize(text):
lda_tokens = []
tokens = parser(text)
for token in tokens:
if token.orth_.isspace():
continue
elif token.like_url:
lda_tokens.append('URL')
elif token.orth_.startswith('@'):
lda_tokens.append('SCREEN_NAME')
else:
lda_tokens.append(token.lower_)
return lda_tokens
def get_lemma(word):
lemma = wn.morphy(word)
if lemma is None:
return word
else:
return lemma
def get_lemma2(word):
return WordNetLemmatizer().lemmatize(word)
def prepare_text_for_lda(text):
tokens = tokenize(text)
tokens = [token for token in tokens if len(token) > 4]
tokens = [token for token in tokens if token not in en_stop]
tokens = [get_lemma(token) for token in tokens]
return tokens
#%% PROCESS DATA
#Completely ripped from:
#https://towardsdatascience.com/topic-modelling-in-python-with-nltk-and-gensim-4ef03213cd21
text_data = []
UniqueTypes = df.Type.unique()
DataFrameDict = {elem : df for elem in UniqueTypes}
for key in DataFrameDict.keys():
DataFrameDict[key] = df[:][df.Type == key]
deficiency_df = pd.DataFrame.from_dict(DataFrameDict['Deficiency'])
recommendations_df = pd.DataFrame.from_dict(DataFrameDict['Recommended Improvement Area'])
grade_summary_df = pd.DataFrame.from_dict(DataFrameDict['Grade/Summary'])
strength_df = pd.DataFrame.from_dict(DataFrameDict['Strength'])
"""
# sort the dataframe
df.sort_values(by='Type', axis=1, inplace=True)
# set the index to be this and don't drop
df.set_index(keys=['Type'], drop=False,inplace=True)
# get a list of names
types=df['Type'].unique().tolist()
# now we can perform a lookup on a 'view' of the dataframe
deficieny = df.loc[df.type=='deficiency']
# now you can query all 'joes'
"""
for ind in recommendations_df.index:
tokens = prepare_text_for_lda(recommendations_df['Comment'][ind])
#if random.random() > .99: #Something is really messed up with this line, why random?
print(tokens)
text_data.append(tokens)
dictionary = corpora.Dictionary(text_data)
corpus = [dictionary.doc2bow(text) for text in text_data]
pickle.dump(corpus, open('corpus.pkl', 'wb'))
dictionary.save('dictionary.gensim')
"""
NUM_TOPICS = 8
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = NUM_TOPICS, id2word=dictionary, passes=15)
ldamodel.save('model8.gensim')
topics = ldamodel.print_topics(num_words=4)
for topic in topics:
print(topic)
"""
NUM_TOPICS = 5
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = NUM_TOPICS, id2word=dictionary, passes=15)
ldamodel.save('model5.gensim')
topics = ldamodel.print_topics(num_words=4)
for topic in topics:
print(topic)
dictionary = gensim.corpora.Dictionary.load('dictionary.gensim')
corpus = pickle.load(open('corpus.pkl', 'rb'))
lda = gensim.models.ldamodel.LdaModel.load('model5.gensim')
lda_display = pyLDAvis.gensim.prepare(lda, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display)
[nltk_data] Downloading package stopwords to /Users/rj/nltk_data... [nltk_data] Package stopwords is already up-to-date! [nltk_data] Downloading package wordnet to /Users/rj/nltk_data... [nltk_data] Package wordnet is already up-to-date!
Index Name Organization \ 1 1.4.2.2 Right Quality anon_S3 2 1.2.3.3 Unit anon_S8 3 1.2.3.3 Unit anon_S8 4 1.3.2.1 Key Work Processes anon_S8 5 1.4.3 Mission-Assurance Command and Control anon_S14 6 1.4.1.3 Right Quantity anon_S20 7 1.3.3.1.1 Self-Assessment Program anon_S23 8 1.1.2.1 Manpower anon_S26 10 1.3.3.1.1 Self-Assessment Program anon_S36 11 1.1.1 Adequacy anon_S40 12 1.2.3.1 Individual anon_S45 13 1.3.3.2.3 Relevance anon_S48 14 1.3.1.2 Strategic Planning anon_S51 15 1.4.3.1 Warfighter or USAF CC Satisfaction anon_S55 16 1.4.1 Primary Mission (s) anon_S61 17 1.4.1.2 Right Quality anon_S66 18 1.2.3.3 Unit anon_S71 19 1.2.3.3 Unit anon_S71 20 1.2.1.3 Intent anon_S76 21 1.3.2.2 Risk Management anon_S76 22 1.1.1.2 Funds anon_S81 23 1.3.2.3 Commitment to Continuous Improvement anon_S81 24 1.4.1.1 Warfighter or USAF CC Satisfaction anon_S85 25 1.2.1.1 System anon_S88 29 1.1.2.3 Equipment anon_S104 30 1.3.2.3 Commitment to Continuous Improvement anon_S104 31 1.2.2.1 Compliance anon_S109 32 1.1.2.6 Airmen's Time anon_S111 34 1.1.2.5 Guidance anon_S119 35 1.2.3.3 Unit anon_S124 .. ... ... ... 67 1.1.1.5 Guidance anon_S229 70 1.1.2.5 Guidance anon_S247 71 1.3.2.2 Risk Management anon_S252 72 1.1.2.3 Equipment anon_S255 73 1.2.2.1 Compliance anon_S255 75 1.2.1.3 Intent anon_S261 76 1.3.2.1 Key Work Processes anon_S264 77 1.3.4.1 Data Collection anon_S264 78 1.3.1.2 Strategic Planning anon_S274 80 1.4.1.2 Right Quality anon_S283 81 1.3.3 CC's Inspection Program (CCIP) anon_S287 83 1.1.2.4 Facilities and Environment anon_S297 84 1.4.1 Primary Mission (s) anon_S303 85 1.1.2.1 Manpower anon_S303 86 1.1.2.4 Facilities and Environment anon_S303 87 1.1.2.3 Equipment anon_S312 88 1.4.2.2 Right Quality anon_S316 89 1.1.2.3 Equipment anon_S321 90 1.2.2.5 Attention to Detail anon_S321 91 1.2.3 Training anon_S321 92 1.3.1.2 Strategic Planning anon_S321 93 1.1.1.2 Funds anon_S325 94 1.3.3 CC's Inspection Program (CCIP) anon_S325 95 1.1.2.3 Equipment anon_S331 96 1.3.2 Process Operations anon_S331 97 1.3.2 Process Operations anon_S331 98 1.3.3 CC's Inspection Program (CCIP) anon_S331 99 1.1.2.3 Equipment anon_S339 101 1.1.2.4 Facilities and Environment anon_S349 102 1.2.2 Discipline anon_S349 Type \ 1 Recommended Improvement Area 2 Deficiency 3 Deficiency 4 Deficiency 5 Strength 6 Deficiency 7 Recommended Improvement Area 8 Recommended Improvement Area 10 Deficiency 11 Grade/Summary 12 Deficiency 13 Recommended Improvement Area 14 Recommended Improvement Area 15 Grade/Summary 16 Deficiency 17 Deficiency 18 Deficiency 19 Deficiency 20 Recommended Improvement Area 21 Deficiency 22 Recommended Improvement Area 23 Recommended Improvement Area 24 Grade/Summary 25 Strength 29 Deficiency 30 Recommended Improvement Area 31 Deficiency 32 Grade/Summary 34 Deficiency 35 Deficiency .. ... 67 Deficiency 70 Deficiency 71 Deficiency 72 Strength 73 Deficiency 75 Grade/Summary 76 Deficiency 77 Deficiency 78 Strength 80 Deficiency 81 Recommended Improvement Area 83 Deficiency 84 Deficiency 85 Recommended Improvement Area 86 Deficiency 87 Deficiency 88 Deficiency 89 Deficiency 90 Strength 91 Recommended Improvement Area 92 Deficiency 93 Grade/Summary 94 Grade/Summary 95 Deficiency 96 Recommended Improvement Area 97 Deficiency 98 Deficiency 99 Recommended Improvement Area 101 Deficiency 102 Deficiency Comment 1 Unit Readiness Program - Recommend the unit a... 2 The Base Training Manager, Unit Training Manag... 3 The Airfield Management Training NCOIC did not... 4 Chief did not document numerous minor discrepa... 5 Contracted Readiness and Emergency Management ... 6 Security Forces Operations - The Security Forc... 7 Self-Assessment Program - Recommend review and... 8 Separation of Duties - Recommend the Communica... 10 Self-Assessment Program - The Mission Support ... 11 HHQ provided adequate resources enabling the w... 12 The did not ensure a qualified person was assi... 13 Business Rules to Support SAP - Recommend the ... 14 IGQ and IGI Interface - Recommend IGI and IGQ ... 15 Commanders at all levels expressed satisfactio... 16 Intelligence Flight did not provide current in... 17 Arming and Use of Force Program - The Security... 18 Electrostatic Discharge Program - The electros... 19 Plans Scheduling and Documentation - The Plans... 20 Family OPSEC Awareness Outreach Program - Reco... 21 Munitions Account Management - The Munitions A... 22 Weapons Load Training Program - Recommend revi... 23 Quality Assurance Program - Recommend the qual... 24 Formal feedback processes effectively gauged f... 25 Leadership Communication - The level of commun... 29 Corrosion Control and Prevention Program - The... 30 OPSEC Program Management and Oversight - Recom... 31 Unit Training Manager did not conduct a compre... 32 The operations tempo within the was extremely ... 34 The Base Records Manager did not ensure comman... 35 did not ensure unit members received records m... .. ... 67 Mission Directive - The Mission Directive, ACC... 70 Materiel Control bench stock program required ... 71 Respiratory Protection Program - The low obser... 72 Equipment Management - Customer Service and Eq... 73 Intelligence Support to Force Protection - The... 75 The Wg/CC posted a Command Philosophy on their... 76 Maintenance Operations Section Chief did not e... 77 did not ensure Job Control Numbers (JCN) great... 78 The cultivated a stellar patient safety progra... 80 CRITIC Program Management - Critical Informati... 81 should ensure exercise related materials are m... 83 The EMS Coordinator did not properly document ... 84 Family Advocacy Program did not always meet re... 85 Recommend multiple Installation Personnel Read... 86 Force Support Squadron's paintball area did no... 87 Flight Service did not ensure all research doc... 88 The Chief of Aerospace Medicine (SGP) did not ... 89 The Vehicle Management Materiel Control NCOIC ... 90 Financial Operations personnel developed and e... 91 Recommend personnel improve Unit Deployment Ma... 92 The SERE Training Superintendent did not ensur... 93 The Continuing Resolution Appropriation and bu... 94 The IGI staff was composed of two ART position... 95 Shop Supervisor did not ensure fire alarm smok... 96 Commander should ensure Test Measurement and D... 97 During the Deployment of Forces exercise in No... 98 During WIT-led exercise (Disease Containment P... 99 Equipment Account Management - Recommend the A... 101 LG Fuels Management Workplace Supervisor did n... 102 SVFL Lodging Manager did not execute base lodg... [85 rows x 5 columns] ['training', 'manager', 'training', 'manager', 'additional', 'training', 'manager', 'supervisor', 'implement', 'account', 'management', 'procedure', 'monitoring', 'role', 'assign', 'individual', 'training', 'business', 'user', 'specifically', 'member', 'assign', 'miss', 'require', 'authorization', 'documentation', 'additionally', 'member', 'miss', 'authorization', 'documentation', 'basic', 'trainee', 'trainer', 'certifier', 'flight', 'chief', 'role', 'relevant'] ['airfield', 'management', 'training', 'ncoic', 'conduct', 'document', 'quarterly', 'inspection', 'training', 'record', 'specifically', 'training', 'record', 'sample', 'quarterly', 'inspection', 'complete', 'relevant'] ['chief', 'document', 'numerous', 'minor', 'discrepancy', 'member', 'flight', 'evaluation', 'folder', 'minor', 'discrepancy', 'consequently', 'annual', 'review', 'adequately', 'accomplish'] ['security', 'force', 'operations', 'security', 'force', 'flight', 'operations', 'require', 'attention', 'security', 'force', 'response', 'force', 'post', 'accordance', 'integrate', 'defense', 'internal', 'security', 'response', 'dedicate', 'interior', 'assign', 'restrict', 'conduct', 'integrate', 'defense', 'management', 'process', 'idrmp', 'coordination', 'integrate', 'defense', 'council', 'integrate', 'defense', 'working', 'group', 'determine', 'appropriate', 'mitigation', 'measure', 'reduce', 'overall', 'assign', 'asset', 'annual', 'idrmps', 'identify', 'vertical', 'inspection', 'failure', 'provide', 'proper', 'security', 'accordance', 'force', 'instructions', 'could', 'inadequate', 'protection', 'assign', 'asset'] ['assessment', 'program', 'mission', 'support', 'group', 'assign', 'squadron', 'fail', 'establish', 'assessment', 'program', 'direct', 'prescribe', 'guidance', 'robust', 'commander', 'inspection', 'program', 'find', 'deficiency', 'improve', 'mission', 'readiness', 'furthermore', 'establish', 'identify', 'cause', 'deficiency', 'enable', 'sharing', 'practice', 'organization', 'without', 'commander', 'ability', 'effectively', 'inspect', 'unit', 'subordinate', 'ensure', 'maximum', 'effectiveness', 'efficiency', 'economy', 'discipline', 'force', 'maintain'] ['ensure', 'qualify', 'person', 'assign', 'position', 'airfield', 'manager'] ['intelligence', 'flight', 'provide', 'current', 'intelligence', 'briefs', 'threat', 'briefs', 'scenario', 'input', 'mission', 'plan', 'mission', 'briefs', 'mission', 'debrief', 'kc-135', 'mission'] ['arming', 'force', 'program', 'security', 'force', 'arming', 'force', 'program', 'require', 'immediate', 'attention', 'commander', 'fail', 'develop', 'authority', 'firearm', 'roster', 'personnel', 'carry', 'firearm', 'commander', 'fail', 'personally', 'interview', 'newly', 'assign', 'personnel', 'prior', 'assigning', 'duty', 'require', 'firearm', 'commander', 'conduct', 'weekly', 'review', 'meeting', 'monthly', 'review', 'meeting', 'training', 'assembly', 'category', 'reserve', 'component', 'member', 'fail', 'maintain', 'arming', 'force', 'monitor', 'appointment', 'letter', 'servicing', 'armory', 'fail', 'conduct', 'personnel', 'reliability', 'assurance', 'program', 'training', 'testing', 'assign', 'personnel', 'coordinate', 'installation', 'hospital', 'ensure', 'immediate', 'notification', 'patient', 'commander', 'representative', 'necessary', 'treat', 'provider', 'identify', 'condition', 'capable', 'impair', 'security', 'force', 'member', 'reliability', 'perform', 'arm', 'duty', 'exercise', 'annually', 'exercise', 'include', 'participation', 'military', 'agency', 'expect', 'assist', 'commander', 'determine', 'member', 'suitability', 'firearm', 'execution', 'official', 'duty', 'fail', 'ensure', 'member', 'arm', 'work', 'previous', 'midnight', 'shift', 'current', 'sign', 'qualification', 'posse', 'firearm', 'ammunition', 'weapon', 'qualification', 'lethal', 'weapon', 'qualification', 'member', 'authority', 'expire', 'since', 'member', 'weapon', 'lethal', 'weapon', 'qualification', 'expire', 'fail', 'small', 'receipt', 'verification', 'authority', 'firearm', 'procedure', 'process', 'place', 'armorer', 'verify', 'person', 'authorization', 'information', 'locate', 'armory', 'roster', 'roster', 'qualification', 'mechanism', 'lethal', 'weapon', 'available', 'validation', 'within', 'armory', 'issue', 'pepper', 'spray', 'special', 'certification', 'recur', 'training', 'record', 'sample', 'reflect', 'occurrence', 'arming', 'force', 'training', 'issue', 'indicate', 'serious', 'program', 'oversight', 'could', 'potentially', 'commander', 'unintentionally', 'accept', 'unqualified', 'personnel', 'arming', 'weapon', 'lethal', 'weapon', 'additionally', 'training', 'documentation', 'could', 'result', 'potential', 'legal', 'issue', 'force', 'employ', 'personnel'] ['electrostatic', 'discharge', 'program', 'electrostatic', 'discharge', 'program', 'avionics', 'section', 'technical', 'order', 'requirement', 'annual', 'awareness', 'prevention', 'training', 'familiarize', 'airman', 'applicable', 'technical', 'section', 'establish', 'administer', 'annual', 'comprehension', 'challenge', 'effective', 'program', 'essential', 'protect', 'equipment', 'aircraft', 'parts', 'inadvertently', 'damage', 'directly', 'affect', 'aircraft', 'readiness', 'availability'] ['plan', 'scheduling', 'documentation', 'plan', 'scheduling', 'documentation', 'section', 'document', 'write', 'guidance', 'training', 'master', 'listing', 'management', 'equipment', 'standard', 'decentralize', 'section', 'without', 'properly', 'train', 'review', 'entire', 'maintenance', 'complex', 'could', 'operate', 'incorrect', 'change', 'cycle', 'result', 'mismanagement', 'critical', 'inspection', 'equipment', 'commodity', 'negatively', 'effect', 'safety', 'flight', 'could', 'potentially', 'aircraft'] ['munition', 'account', 'management', 'munition', 'accountable', 'system', 'officer', 'maintain', 'combat', 'ammunition', 'system', 'segregation', 'duty', 'multiple', 'occasions', 'fiscal', 'process', 'munition', 'transactions', 'validate', 'transactions', 'relate', 'specific', 'transactions', 'include', 'munition', 'movement', 'custody', 'account', 'expenditure', 'inventory', 'process', 'validate', 'daily', 'transaction', 'history', 'report', 'maintain', 'segregation', 'ensure', 'personnel', 'responsible', 'processing', 'munition', 'transactions', 'responsible', 'approving'] ['corrosion', 'control', 'prevention', 'program', 'aerospace', 'ground', 'equipment', 'section', 'properly', 'manage', 'equipment', 'corrosion', 'control', 'program', 'locally', 'develop', 'product', 'track', 'equipment', 'paint', 'status', 'however', 'sample', 'take', 'capstone', 'assets', 'overdue', 'annual', 'corrosion', 'inspection', 'active', 'corrosion', 'control', 'program', 'require', 'ensure', 'durability', 'support', 'equipment', 'assist', 'scheduling', 'prioritize', 'assets', 'worst', 'first', 'basis', 'appropriate', 'corrosion', 'facility'] ['training', 'manager', 'conduct', 'comprehensive', 'trainee', 'orientation', 'trainee', 'initially', 'entering', 'within', 'assignment', 'document', 'completion', 'approve', 'automate', 'system', 'specifically', 'training', 'record', 'review', 'requirement'] ['record', 'manager', 'ensure', 'commander', 'receive', 'training', 'regard', 'record', 'management', 'program'] ['ensure', 'member', 'receive', 'record', 'management', 'training'] ['personally', 'identifiable', 'information', 'disclosure', 'safeguard', 'personally', 'identifiable', 'information', 'share', 'network', 'drive', 'folder', 'contain', 'award', 'package', 'accessible', 'personnel', 'without', 'manage', 'record', 'share', 'network', 'drive', 'involve', 'participation', 'network', 'control', 'center', 'staff', 'record', 'professional', 'commander', 'user', 'share', 'responsibility', 'protect', 'record', 'content', 'comply', 'privacy', 'requirement', 'user', 'store', 'manage', 'protect', 'record', 'consistent', 'organizational', 'requirement', 'procedure', 'accord', 'afman', 'force', 'policy', 'require', 'evaluation', 'information', 'system', 'owner', 'personal', 'collect', 'maintain', 'store', 'electronic', 'system', 'determine', 'impact', 'unauthorized', 'disclosure', 'active', 'application', 'guidance', 'help', 'prevent', 'inadvertent', 'access', 'protect', 'information'] ['equipment', 'custodian', 'always', 'conduct', 'floor', 'floor', 'inventory'] ['office', 'visit', 'suspense', 'expectation', 'often', 'provide', 'level', 'commensurate', 'developmental', 'level', 'recruiter', 'result', 'drive', 'expectation', 'issue', 'inexperienced', 'recruiter', 'without', 'specific', 'action', 'base', 'expectation', 'achieve', 'result', 'office', 'visit', 'often', 'ass', 'effectiveness', 'seven', 'mission', 'critical', 'task', 'focus', 'compliance', 'effectiveness', 'area', 'amount', 'expectation', 'adjust', 'respond', 'inadequate', 'production'] ['civil', 'engineer', 'operations', 'flight', 'follow', 'prioritization', 'system', 'guidance'] ['cybersecurity', 'workforce', 'improvement', 'program', 'require', 'attention', 'privilege', 'access', 'user', 'maintain', 'cybersecurity', 'baseline', 'certification', 'privilege', 'access', 'user', 'without', 'cybersecurity', 'baseline', 'certification', 'properly', 'waive'] ['flight', 'leadership', 'ensure', 'flight', 'member', 'train', 'certify', 'operate', 'response', 'vehicle', 'flight'] ['medical', 'readiness', 'officer', 'ensure', 'comprehensive', 'medical', 'readiness', 'program', 'training', 'analysis', 'conduct', 'annually', 'document', 'afscs'] ['combat', 'ncoic', 'review', 'update', 'annually'] ['weapon', 'safety', 'manager', 'ensure', 'assessment', 'accomplish', 'exercise', 'training', 'involve', 'explosive'] ['small', 'range', 'manager', 'prevent', 'improper', 'disposal', 'hazardous', 'waste', 'small', 'range'] ['provide', 'employee', 'initial', 'ethics', 'orientation', 'development', 'training', 'flight', 'member', 'within', 'arrival'] ['commander', 'ensure', 'accessible', 'individual', 'whose', 'official', 'duty', 'provide', 'valid'] ['annual', 'review', 'special', 'security', 'officer', 'ensure', 'commander', 'conduct', 'document', 'annual', 'review', 'sensitive', 'compartmented', 'information', 'facility', 'standard', 'operate', 'procedure', 'conducting', 'annual', 'review', 'ensure', 'current', 'security', 'procedure', 'place'] ['mission', 'directive', 'mission', 'directive', 'accmd', 'volume', 'update', 'mission', 'change', 'least', 'every', 'years', 'recent', 'publish', 'mission', 'directive', 'date', 'almost', 'years', 'minimum', 'require', 'review', 'significant', 'additional', 'mission', 'requirement', 'levy', 'since', 'require', 'mission', 'directive', 'update', 'ensure', 'mission', 'directive', 'current', 'ensure', 'manpower', 'resource', 'available', 'fully', 'execute', 'assign', 'mission', 'accurately', 'reflect', 'scale', 'mission'] ['materiel', 'control', 'bench', 'stock', 'program', 'require', 'attention', 'bench', 'stock', 'item', 'approve', 'vehicle', 'fleet', 'manager', 'vehicle', 'management', 'superintendent', 'working', 'stock', 'item', 'approve', 'working', 'stock', 'contain', 'authorize', 'quantity', 'current', 'listing', 'match', 'inventory'] ['respiratory', 'protection', 'program', 'observables', 'section', 'respiratory', 'protection', 'program', 'review', 'approve', 'annually', 'bioenvironmental', 'engineering', 'require', 'review', 'approve', 'complete', 'april', 'contaminate', 'cartridge', 'store', 'respirator', 'change', 'schedule', 'always', 'follow', 'require', 'approve', 'cartridge', 'times', 'track', 'document', '30-day', 'inspection', 'respirator', 'complete', 'document', 'inspection', 'maintenance', 'record', 'maintain', 'effective', 'ensure', 'member', 'aware', 'storage', 'requirement', 'respirator', 'protect', 'member', 'exposure', 'hazardous', 'material'] ['intelligence', 'support', 'force', 'protection', 'intelligence', 'support', 'force', 'protection', 'appointment', 'letter', 'current', 'appointment', 'letter', 'draft', 'sign', 'current', 'commander', 'appoint', 'member', 'complete', 'require', 'training', 'terrorism', 'level', 'training', 'available', 'force', 'protection', 'intelligence', 'formal', 'training', 'properly', 'train', 'personnel', 'directly', 'impact', 'force', 'protection', 'relate', 'intelligence', 'function', 'deployment', 'preparation', 'conducting', 'focus', 'predictive', 'analysis', 'support', 'threat', 'working', 'group', 'force', 'projection', 'working', 'group', 'sessions'] ['maintenance', 'operations', 'section', 'chief', 'ensure', 'proper', 'oversight', 'scheduling', 'process', 'specifically', 'plan', 'scheduling', 'documentation', 'section', 'perform', 'master', 'listing', 'reconcile', 'applicable', 'commodity', 'technical', 'order', 'ensure', 'accuracy', 'currency', 'change', 'maintenance', 'information', 'system', 'receipt', 'force', 'instruction', 'change', 'frequency', 'establish', 'standard', 'munition', 'maintenance', 'scheduling', 'effectiveness', 'program', 'monthly', 'review', 'meeting', 'own', 'agency', 'within', 'scheduling', 'communicator', 'assess', 'within', 'publication', 'within', 'management', 'internal', 'control', 'toolset'] ['ensure', 'control', 'numbers', 'greater', 'calendar', 'reschedule', 'defer', 'event', 'beyond', 'schedule', 'start', 'specifically', 'assign', 'deferment', '3,158', 'event'] ['critic', 'program', 'management', 'critical', 'information', 'critic', 'program', 'manager', 'ensure', 'subordinate', 'unit', 'develop', 'maintain', 'critic', 'program', 'accordance', 'governing', 'document', 'sigint', 'elements', 'production', 'function', 'rapidly', 'recognize', 'report', 'conditions', 'meeting', 'critic', 'criterion', 'participate', 'critic', 'evaluation', 'program', 'exception', 'waiver', 'approve', 'publish', 'within', 'unit', 'publish', 'unite', 'state', 'signal', 'intelligence', 'directive', 'ussid', 'effective', 'ensure', 'preparation', 'world', 'critic', 'situation', 'failure', 'establish', 'critic', 'program', 'could', 'result', 'latent', 'reporting', 'critical', 'intelligence', 'information'] ['coordinator', 'properly', 'document', 'result', 'internal', 'environmental', 'compliance', 'stage', 'inspection', 'edash', 'require', 'specifically', 'event', 'finding', 'tracker', 'findings', 'program', 'assess', 'november', 'however', 'discrepancy', 'report', 'compliant'] ['family', 'advocacy', 'program', 'always', 'require', 'timeline', 'provide', 'treatment', 'services', 'refer', 'patient'] ['force', 'support', 'squadron', 'paintball', 'utilize', 'protective', 'vegetative', 'cover', 'control', 'stabilize', 'site', 'avoid', 'silt', 'stream'] ['flight', 'service', 'ensure', 'research', 'documentation', 'found', 'transactions', 'specifically', 'supply', 'surveillance', 'report', 'd20)was', 'file', 'documentation', 'transaction', 'process'] ['chief', 'aerospace', 'medicine', 'manage', 'deployment', 'relate', 'health', 'assessment', 'program', 'specifically', 'ensure', 'complete', 'within', 'require', 'timeframes', 'senior', 'leadership', 'training', 'conduct', 'annually', 'review', 'feedback', 'provide', 'primary', 'team'] ['vehicle', 'management', 'materiel', 'control', 'ncoic', 'ensure', 'tool', 'issue', 'consolidate', 'replace', 'personal', 'correspond', 'individual', 'af1297'] ['training', 'superintendent', 'ensure', 'assign', 'specialist', 'conduct', 'mandatory', 'review', 'task', 'oplans', 'personnel', 'recovery', 'annexe', 'support', 'theater', 'campaign', 'plan'] ['supervisor', 'ensure', 'alarm', 'smoke', 'detection', 'testing', 'equipment', 'calibration', 'facility', 'annual', 'calibration', 'digital', 'manometer', 'remove', 'service', 'calibration', 'expire', 'digital', 'manometer', 'receive', 'annual', 'calibration', 'expiration', 'assessment', 'communicator', 'associate', 'deficiency'] ['deployment', 'force', 'exercise', 'november', 'installation', 'deployment', 'officer', 'ensure', 'processing', 'provide', 'deploy', 'personnel', 'opportunity', 'conduct', 'preventive', 'maintenance', 'service', 'check', 'pmscs', 'mask', 'base', 'population', 'deploy', 'personnel', 'issue', 'chalk-1', 'conduct', 'pmscs', 'mask', 'prior', 'palletizing', 'shipment', 'assessment', 'communicator', 'associate', 'deficiency'] ['exercise', 'disease', 'containment', 'october', 'november', 'member', 'detect', 'report', 'commander', 'develop', 'implement', 'write', 'guidance', 'small', 'light', 'weapon', 'safety', 'tailor', 'specifically', 'exercise', 'assessment', 'communicator', 'associate', 'deficiency'] ['fuel', 'management', 'workplace', 'supervisor', 'inform', 'safety', 'office', 'bioenvironmental', 'engineering', 'public', 'health', 'and/or', 'preventive', 'medicine', 'personnel', 'change', 'workplace', 'equipment', 'practice', 'procedure', 'impact', 'exposure', 'occupational', 'environmental', 'health', 'hazard', 'service', 'station', 'facility', 'operational', 'february', 'receive', 'health', 'assessment', 'prior', 'place', 'service', 'management', 'personnel', 'work', 'around', 'facility', 'personnel', 'service', 'station', 'without', 'health', 'assessment', 'complete', 'accomplish', 'applicable', 'assessment', 'communicator', 'identify', 'compliance', 'deficiency'] ['lodging', 'manager', 'execute', 'lodging', 'program', 'establish', 'instructions', 'indicate', 'document', 'training', 'lodging', 'employee', 'lodging', 'program', 'approve', 'base', 'training', 'program', 'ensure', 'employee', 'receive', 'initial', 'training', 'ensure', 'specific', 'preventive', 'maintenance', 'conduct', 'document', 'least', 'guest', 'rooms', 'quarter', 'accomplish', 'applicable', 'assessment', 'communicator', 'detect', 'compliance', 'however', 'identify', 'compliance', 'deficiency'] (0, '0.020*"munition" + 0.016*"stock" + 0.016*"expectation" + 0.016*"transactions"') (1, '0.028*"force" + 0.019*"training" + 0.016*"mission" + 0.013*"commander"') (2, '0.023*"ensure" + 0.015*"record" + 0.014*"conduct" + 0.013*"information"') (3, '0.034*"program" + 0.020*"critic" + 0.014*"require" + 0.014*"document"') (4, '0.027*"training" + 0.018*"program" + 0.018*"ensure" + 0.016*"manager"')
/Users/rj/anaconda3/lib/python3.7/site-packages/pyLDAvis/_prepare.py:257: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'. return pd.concat([default_term_info] + list(topic_dfs))
Word Cloud¶
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 24 16:04:44 2019
@author: rj
"""
#%% IMPORTS
#import xlrd
import spacy
import pandas as pd
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer
import random
import gensim
from gensim import corpora
import pickle
import pyLDAvis.gensim
spacy.load('en')
from spacy.lang.en import English
parser = English()
nltk.download('stopwords')
nltk.download('wordnet')
en_stop = set(nltk.corpus.stopwords.words('english'))
#%% INPUT DATA
#loc = ('./Example IGEMS data.xlsx') #Insert filepath to the IGEMS Data
loc = ('/Users/rj/Documents/Hacking_4_Defense/python_code/Example IGEMS data.xlsx') #Insert filepath to the IGEMS Data
"""
wb = xlrd.open_workbook(loc)
'sheet = wb.sheet_by_index(0)
"""
data = pd.read_excel(loc)
df = pd.DataFrame(data, columns= ['Index','Name','Organization','Type','Comment'])
df = df.dropna() #Get rid of NaN rows
print(df)
Index Name Organization \ 1 1.4.2.2 Right Quality anon_S3 2 1.2.3.3 Unit anon_S8 3 1.2.3.3 Unit anon_S8 4 1.3.2.1 Key Work Processes anon_S8 5 1.4.3 Mission-Assurance Command and Control anon_S14 6 1.4.1.3 Right Quantity anon_S20 7 1.3.3.1.1 Self-Assessment Program anon_S23 8 1.1.2.1 Manpower anon_S26 10 1.3.3.1.1 Self-Assessment Program anon_S36 11 1.1.1 Adequacy anon_S40 12 1.2.3.1 Individual anon_S45 13 1.3.3.2.3 Relevance anon_S48 14 1.3.1.2 Strategic Planning anon_S51 15 1.4.3.1 Warfighter or USAF CC Satisfaction anon_S55 16 1.4.1 Primary Mission (s) anon_S61 17 1.4.1.2 Right Quality anon_S66 18 1.2.3.3 Unit anon_S71 19 1.2.3.3 Unit anon_S71 20 1.2.1.3 Intent anon_S76 21 1.3.2.2 Risk Management anon_S76 22 1.1.1.2 Funds anon_S81 23 1.3.2.3 Commitment to Continuous Improvement anon_S81 24 1.4.1.1 Warfighter or USAF CC Satisfaction anon_S85 25 1.2.1.1 System anon_S88 29 1.1.2.3 Equipment anon_S104 30 1.3.2.3 Commitment to Continuous Improvement anon_S104 31 1.2.2.1 Compliance anon_S109 32 1.1.2.6 Airmen's Time anon_S111 34 1.1.2.5 Guidance anon_S119 35 1.2.3.3 Unit anon_S124 .. ... ... ... 67 1.1.1.5 Guidance anon_S229 70 1.1.2.5 Guidance anon_S247 71 1.3.2.2 Risk Management anon_S252 72 1.1.2.3 Equipment anon_S255 73 1.2.2.1 Compliance anon_S255 75 1.2.1.3 Intent anon_S261 76 1.3.2.1 Key Work Processes anon_S264 77 1.3.4.1 Data Collection anon_S264 78 1.3.1.2 Strategic Planning anon_S274 80 1.4.1.2 Right Quality anon_S283 81 1.3.3 CC's Inspection Program (CCIP) anon_S287 83 1.1.2.4 Facilities and Environment anon_S297 84 1.4.1 Primary Mission (s) anon_S303 85 1.1.2.1 Manpower anon_S303 86 1.1.2.4 Facilities and Environment anon_S303 87 1.1.2.3 Equipment anon_S312 88 1.4.2.2 Right Quality anon_S316 89 1.1.2.3 Equipment anon_S321 90 1.2.2.5 Attention to Detail anon_S321 91 1.2.3 Training anon_S321 92 1.3.1.2 Strategic Planning anon_S321 93 1.1.1.2 Funds anon_S325 94 1.3.3 CC's Inspection Program (CCIP) anon_S325 95 1.1.2.3 Equipment anon_S331 96 1.3.2 Process Operations anon_S331 97 1.3.2 Process Operations anon_S331 98 1.3.3 CC's Inspection Program (CCIP) anon_S331 99 1.1.2.3 Equipment anon_S339 101 1.1.2.4 Facilities and Environment anon_S349 102 1.2.2 Discipline anon_S349 Type \ 1 Recommended Improvement Area 2 Deficiency 3 Deficiency 4 Deficiency 5 Strength 6 Deficiency 7 Recommended Improvement Area 8 Recommended Improvement Area 10 Deficiency 11 Grade/Summary 12 Deficiency 13 Recommended Improvement Area 14 Recommended Improvement Area 15 Grade/Summary 16 Deficiency 17 Deficiency 18 Deficiency 19 Deficiency 20 Recommended Improvement Area 21 Deficiency 22 Recommended Improvement Area 23 Recommended Improvement Area 24 Grade/Summary 25 Strength 29 Deficiency 30 Recommended Improvement Area 31 Deficiency 32 Grade/Summary 34 Deficiency 35 Deficiency .. ... 67 Deficiency 70 Deficiency 71 Deficiency 72 Strength 73 Deficiency 75 Grade/Summary 76 Deficiency 77 Deficiency 78 Strength 80 Deficiency 81 Recommended Improvement Area 83 Deficiency 84 Deficiency 85 Recommended Improvement Area 86 Deficiency 87 Deficiency 88 Deficiency 89 Deficiency 90 Strength 91 Recommended Improvement Area 92 Deficiency 93 Grade/Summary 94 Grade/Summary 95 Deficiency 96 Recommended Improvement Area 97 Deficiency 98 Deficiency 99 Recommended Improvement Area 101 Deficiency 102 Deficiency Comment 1 Unit Readiness Program - Recommend the unit a... 2 The Base Training Manager, Unit Training Manag... 3 The Airfield Management Training NCOIC did not... 4 Chief did not document numerous minor discrepa... 5 Contracted Readiness and Emergency Management ... 6 Security Forces Operations - The Security Forc... 7 Self-Assessment Program - Recommend review and... 8 Separation of Duties - Recommend the Communica... 10 Self-Assessment Program - The Mission Support ... 11 HHQ provided adequate resources enabling the w... 12 The did not ensure a qualified person was assi... 13 Business Rules to Support SAP - Recommend the ... 14 IGQ and IGI Interface - Recommend IGI and IGQ ... 15 Commanders at all levels expressed satisfactio... 16 Intelligence Flight did not provide current in... 17 Arming and Use of Force Program - The Security... 18 Electrostatic Discharge Program - The electros... 19 Plans Scheduling and Documentation - The Plans... 20 Family OPSEC Awareness Outreach Program - Reco... 21 Munitions Account Management - The Munitions A... 22 Weapons Load Training Program - Recommend revi... 23 Quality Assurance Program - Recommend the qual... 24 Formal feedback processes effectively gauged f... 25 Leadership Communication - The level of commun... 29 Corrosion Control and Prevention Program - The... 30 OPSEC Program Management and Oversight - Recom... 31 Unit Training Manager did not conduct a compre... 32 The operations tempo within the was extremely ... 34 The Base Records Manager did not ensure comman... 35 did not ensure unit members received records m... .. ... 67 Mission Directive - The Mission Directive, ACC... 70 Materiel Control bench stock program required ... 71 Respiratory Protection Program - The low obser... 72 Equipment Management - Customer Service and Eq... 73 Intelligence Support to Force Protection - The... 75 The Wg/CC posted a Command Philosophy on their... 76 Maintenance Operations Section Chief did not e... 77 did not ensure Job Control Numbers (JCN) great... 78 The cultivated a stellar patient safety progra... 80 CRITIC Program Management - Critical Informati... 81 should ensure exercise related materials are m... 83 The EMS Coordinator did not properly document ... 84 Family Advocacy Program did not always meet re... 85 Recommend multiple Installation Personnel Read... 86 Force Support Squadron's paintball area did no... 87 Flight Service did not ensure all research doc... 88 The Chief of Aerospace Medicine (SGP) did not ... 89 The Vehicle Management Materiel Control NCOIC ... 90 Financial Operations personnel developed and e... 91 Recommend personnel improve Unit Deployment Ma... 92 The SERE Training Superintendent did not ensur... 93 The Continuing Resolution Appropriation and bu... 94 The IGI staff was composed of two ART position... 95 Shop Supervisor did not ensure fire alarm smok... 96 Commander should ensure Test Measurement and D... 97 During the Deployment of Forces exercise in No... 98 During WIT-led exercise (Disease Containment P... 99 Equipment Account Management - Recommend the A... 101 LG Fuels Management Workplace Supervisor did n... 102 SVFL Lodging Manager did not execute base lodg... [85 rows x 5 columns]
[nltk_data] Downloading package stopwords to /Users/rj/nltk_data... [nltk_data] Package stopwords is already up-to-date! [nltk_data] Downloading package wordnet to /Users/rj/nltk_data... [nltk_data] Package wordnet is already up-to-date!