#!/usr/bin/env python3 # import os import sys import json import time import re import logging import urllib.parse import requests import configparser from datetime import datetime ############################################### CONFIG_FILE = "/home/mischa/netskope/netskope.cnf" if not os.path.isfile(CONFIG_FILE): logging.error(f"The config file {CONFIG_FILE} doesn't exist") sys.exit(1) config = configparser.RawConfigParser() config.read(CONFIG_FILE) NTSKP_TENANT = config.get('netskope', 'NTSKP_TENANT') NTSKP_TOKEN = config.get('netskope', 'NTSKP_TOKEN') NTSKP_PERIOD = config.get('netskope', 'NTSKP_PERIOD') NTSKP_SCORE = config.get('netskope', 'NTSKP_SCORE') NTSKP_CATEGORIES = config.get('netskope', 'NTSKP_CATEGORIES') NTSKP_CONFIDENCE = config.get('netskope', 'NTSKP_CONFIDENCE') PROXY = config.get('general', 'PROXY') ############################################### # Use a custom user-agent string UA_STRING = 'NetskopeAPICollector1.0' # Set logging.INFO to logging.DEBUG for debug information logging.basicConfig(level=logging.INFO) logging = logging.getLogger('NetskopeAPICollector') ############################################### def ntskp_get_domains(headers): skip = 0 filename = f"/home/mischa/netskope/api-{datetime.now().strftime('%Y%m%d')}.txt" logging.info(f"File {filename} created") ssl_session = requests.Session() logging.debug(f"{ssl_session}") while True: uri = f'{NTSKP_TENANT}/api/v1/events?token={NTSKP_TOKEN}&type=page&timeperiod={NTSKP_PERIOD}&skip={skip}' try: r = ssl_session.get(uri, headers=headers, proxies=PROXY) r.raise_for_status() except Exception as e: logging.error(f'Error: {str(e)}') sys.exit(1) json = r.json() #if json['data']: if 'data' in json: if len(json['data']) <= 5000: skip += 5000 filter_file = open(filename, "a") logging.debug(f"File {filename} opened") for item in json['data']: if not 'domain' in item: domain = urllib.parse.urlparse(item['url']).netloc else: domain = item['domain'] #if NTSKP_SAFELIST.search(domain): #print(domain) #if item['ccl'] in NTSKP_CONFIDENCE: utctime = datetime.utcfromtimestamp(item['timestamp']).strftime('%Y-%m-%d %H:%M:%S') filter_file.write(f"{utctime},{domain},{item['cci']},{item['category']},{item['ccl']},{item['user']}\n") filter_file.close() logging.debug(f"File {filename} closed") logging.debug(f"Next request, skip: {skip}") else: logging.info(f"No more data to collect") break else: logging.info(f"No more data to collect") break if skip == 500000: logging.info(f"Reached limit") break ############################################### request_headers = {'Content-Type': 'application/json', 'Cache-Control': 'no-cache', 'User-Agent': UA_STRING} ntskp_get_domains(request_headers)