Files
RS4OSINT/formatting.ipynb
2022-12-21 16:47:12 +00:00

865 lines
32 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"0\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"0\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"5\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"y\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"]\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"0\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
"0\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"1\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
"r\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"0\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
"'\n",
"--------------------------------------------------\n",
"}\n",
"--------------------------------------------------\n",
")\n",
"--------------------------------------------------\n",
")\n"
]
}
],
"source": [
"import re\n",
"\n",
"def captions(f): # add captions to figures\n",
" regex=r'(!\\[\\].+\\))\\n(\\nFig\\..*)' # find figures\n",
"\n",
" figures=re.findall(regex, f) # find figures\n",
" print(len(figures)) # number of figures\n",
" sub=[] # list of figures with captions\n",
" for fig in figures: # for each figure\n",
" formatted=fig[0].replace('[]','[{}]'.format(fig[1].replace('\\n',''))) # add caption\n",
" f=f.replace(fig[0],formatted) # replace figure with formatted figure\n",
" f=f.replace(fig[1],\"\") # remove caption\n",
" return f\n",
"\n",
"def codeblocks(f):\n",
" \n",
" regex = r\"^\\/\\/.*(\\n|.)*?;\"\n",
" pattern = re.compile(regex, re.MULTILINE)\n",
" matches = pattern.findall(f)\n",
" for match in matches:\n",
" print('-'*50)\n",
" print(match)\n",
" return f\n",
"\n",
"for i in [2,4,5,6]:\n",
" f = open(\"F{}.qmd\".format(i), \"r\").read().replace('\\xa0', ' ')\n",
" #f = captions(f)\n",
" #text_file = open(\"F{}.qmd\".format(i), \"w\")\n",
" #n = text_file.write(f)\n",
" #text_file.close()\n",
" f = codeblocks(f) "
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'urllib2' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/6q/jt4x0r8n1rs0kbrrqrbj61fr0000gn/T/ipykernel_60302/1639298892.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0murllib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0murllib2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'F2.qmd'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'text/javascript'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"It has js.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'urllib2' is not defined"
]
}
],
"source": [
"import urllib\n",
"if urllib2.open('F2.qmd').read().find('text/javascript') == 0:\n",
" print(\"It has js.\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.5 64-bit ('3.9.5')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "d34fbd810dd9652f8e464616181cf14dbb258b5c046bed5c2f54c6b5e518fed2"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}