Commit bab1c47a authored by Daniel Nurmi's avatar Daniel Nurmi
Browse files

adding GEM analyzer and queries


Signed-off-by: default avatarDaniel Nurmi nurmi@anchore.com>
parent 9135f23f
#!/usr/bin/env python
import sys
import os
import shutil
import re
import json
import time
import rpm
import subprocess
import anchore.anchore_utils
#anchore_modules_utils = __import__('anchore-modules.anchore_modules_utils').anchore_modules_utils
analyzer_name = "package_list"
try:
config = anchore.anchore_utils.init_analyzer_cmdline(sys.argv, analyzer_name)
except Exception as err:
print str(err)
sys.exit(1)
imgname = config['imgid']
imgid = imgname
outputdir = config['dirs']['outputdir']
unpackdir = config['dirs']['unpackdir']
#if not os.path.exists(outputdir):
# os.makedirs(outputdir)
pkglist = {}
try:
allfiles = {}
if os.path.exists(unpackdir + "/anchore_allfiles.json"):
with open(unpackdir + "/anchore_allfiles.json", 'r') as FH:
allfiles = json.loads(FH.read())
else:
fmap, allfiles = anchore.anchore_utils.get_files_from_path(unpackdir + "/rootfs")
with open(unpackdir + "/anchore_allfiles.json", 'w') as OFH:
OFH.write(json.dumps(allfiles))
for tfile in allfiles.keys():
patt = re.match(".*specifications.*\.gemspec$", tfile)
if patt:
thefile = '/'.join([unpackdir, 'rootfs', tfile])
with open(thefile, 'r') as FH:
try:
pdata = FH.read().decode('utf8')
precord = anchore_modules_utils.gem_parse_meta(pdata)
for k in precord.keys():
record = precord[k]
pkglist[tfile] = json.dumps(record)
except Exception as err:
print "WARN: " + str(thefile) + " : " + str(err)
except Exception as err:
import traceback
traceback.print_exc()
raise err
if pkglist:
ofile = os.path.join(outputdir, 'pkgs.gems')
anchore.anchore_utils.write_kvfile_fromdict(ofile, pkglist)
sys.exit(0)
#!/usr/bin/env python
import sys
import os
import stat
import re
import json
import traceback
import anchore.anchore_utils
# main routine
try:
config = anchore.anchore_utils.init_query_cmdline(sys.argv, "params: <package name> <package name> ...\nhelp: use 'all' to show all packages")
except Exception as err:
print str(err)
sys.exit(1)
if not config:
sys.exit(0)
if len(config['params']) <= 0:
print "Query requires input: <package name> <package name> ..."
outlist = list()
warns = list()
outlist.append(["Image_Id", "Repo_Tags", "Package_Name", "Version", "Origin", "Source", "License", "Location"])
try:
# handle the good case, something is found resulting in data matching the required columns
allimages = {}
pkgdetail_data = anchore.anchore_utils.load_analysis_output(config['imgid'], 'package_list', 'pkgs.gems')
for pname in pkgdetail_data.keys():
jsonstr = pkgdetail_data[pname]
pkgdata = json.loads(pkgdetail_data[pname])
location = pname
name = pkgdata['name']
match = False
if 'all' in config['params']:
match = True
else:
for prefix in config['params']:
if re.match(prefix, name):
match = True
break
if not match:
continue
version = ' '.join(pkgdata.pop('versions', 'Unknown'))
origin = ' '.join(pkgdata.pop('origins', 'N/A'))
source = pkgdata.pop('sourcepkg', 'N/A')
lic = ' '.join(pkgdata.pop('lics', 'Unknown'))
if not lic:
lic = 'Unknown'
if not origin:
origin = 'N/A'
if not version:
version = 'Unknown'
if not source:
source = 'N/A'
outlist.append([config['meta']['shortId'], config['meta']['humanname'], name, version, origin, source, lic, location])
except Exception as err:
# handle the case where something wrong happened
import traceback
traceback.print_exc()
warns.append("Query failed for image ("+str(config['imgid'])+") with exception: " + str(err))
anchore.anchore_utils.write_kvfile_fromlist(config['output'], outlist)
if len(warns) > 0:
anchore.anchore_utils.write_plainfile_fromlist(config['output_warns'], warns)
sys.exit(0)
#!/usr/bin/env python
import sys
import os
import stat
import re
import json
import traceback
import anchore.anchore_utils
# main routine
try:
config = anchore.anchore_utils.init_query_cmdline(sys.argv, "params: <package name> <package name> ...\nhelp: use 'all' to show all packages")
except Exception as err:
print str(err)
sys.exit(1)
if not config:
sys.exit(0)
if len(config['params']) <= 0:
print "Query requires input: <package name> <package name> ..."
outlist = list()
warns = list()
outlist.append(["Image_Id", "Repo_Tags", "*Package_Name", "Version", "Location"])
try:
# handle the good case, something is found resulting in data matching the required columns
allimages = {}
pkgdetail_data = anchore.anchore_utils.load_analysis_output(config['imgid'], 'package_list', 'pkgs.gems')
for pname in pkgdetail_data.keys():
jsonstr = pkgdetail_data[pname]
pkgdata = json.loads(pkgdetail_data[pname])
location = pname
name = pkgdata['name']
match = False
if 'all' in config['params']:
match = True
else:
for prefix in config['params']:
if re.match(prefix, name):
match = True
break
if not match:
continue
version = ' '.join(pkgdata.pop('versions', 'Unknown'))
origin = ' '.join(pkgdata.pop('origins', 'N/A'))
source = pkgdata.pop('sourcepkg', 'N/A')
lic = ' '.join(pkgdata.pop('lics', 'Unknown'))
if not lic:
lic = 'Unknown'
if not origin:
origin = 'N/A'
if not version:
version = 'Unknown'
if not source:
source = 'N/A'
outlist.append([config['meta']['shortId'], config['meta']['humanname'], name, version, location])
except Exception as err:
# handle the case where something wrong happened
import traceback
traceback.print_exc()
warns.append("Query failed for image ("+str(config['imgid'])+") with exception: " + str(err))
anchore.anchore_utils.write_kvfile_fromlist(config['output'], outlist)
if len(warns) > 0:
anchore.anchore_utils.write_plainfile_fromlist(config['output_warns'], warns)
sys.exit(0)
......@@ -196,7 +196,7 @@ def feed_group_data_exists(feed, group, datafile):
return (False)
def sync_feeds(force_since=None):
def sync_feeds(force_since=None, do_combine=False):
ret = {'success': False, 'text': "", 'status_code': 1}
feedmeta = load_anchore_feedmeta()
......@@ -252,8 +252,6 @@ def sync_feeds(force_since=None):
_logger.warn("\t\tWARN: failed to download feed/group data (" + str(feed) + "/" + str(
group) + "): {}".format(err_msg))
rc, msg = handle_anchore_feed_post(feed, group)
else:
_logger.info("skipping data sync for unsubscribed feed (" + str(feed) + ") ...")
......@@ -269,6 +267,10 @@ def sync_feeds(force_since=None):
if not save_anchore_feedmeta(feedmeta):
ret['text'] = "\t\tWARN: failed to store metadata on synced feed data"
# if user has asked for data compress, do it now
if do_combine:
handle_datafile_combine()
return (True, ret)
......@@ -365,6 +367,8 @@ def save_anchore_feed_group_data(feed, group, datafile, data):
def load_anchore_feed_group_data(feed, group, datafile):
return (contexts['anchore_db'].load_feed_group_data(feed, group, datafile))
def delete_anchore_feed_group_data(feed, group, datafile):
return (contexts['anchore_db'].delete_feed_group_data(feed, group, datafile))
def load_anchore_feed(feed, group, ensure_unique=False):
ret = {'success': False, 'msg': "", 'data': list()}
......@@ -381,7 +385,10 @@ def load_anchore_feed(feed, group, ensure_unique=False):
if datameta and 'datafiles' in datameta:
unique_hash = {}
for datafile in sorted(datameta['datafiles']):
revfiles = sorted(datameta['datafiles'])
revfiles.reverse()
#for datafile in sorted(datameta['datafiles']):
for datafile in revfiles:
thelist = load_anchore_feed_group_data(feed, group, datafile)
if ensure_unique:
for el in thelist:
......@@ -389,7 +396,8 @@ def load_anchore_feed(feed, group, ensure_unique=False):
elkey = el.keys()[0]
if elkey in unique_hash:
_logger.debug("FOUND duplicate entry during scan for unique data values: " + str(elkey))
unique_hash[elkey] = el
else:
unique_hash[elkey] = el
ret['data'] = ret['data'] + thelist
ret['success'] = True
......@@ -467,9 +475,59 @@ def handle_anchore_feed_post(feed, group):
msg = "failed to download/import image: " + imageId
else:
_logger.info("\t\tskipping: " + str(imageId) + ": already in DB")
else:
# no handler
pass
return (ret, msg)
def handle_datafile_combine():
ret = True
feedmeta = load_anchore_feedmeta()
for feed in feedmeta.keys():
if 'groups' in feedmeta[feed]:
_logger.info("combining data for feed ("+str(feed)+") ...")
for group in feedmeta[feed]['groups']:
rawdata = load_anchore_feed(feed, group, ensure_unique=False)
data = rawdata['data']
uniqhash = {}
uniq = list()
collisions = 0
for v in data:
vid = None
try:
if feed == 'vulnerabilities':
vid = v['Vulnerability']['Name']
elif feed == 'packages':
vid = v.keys()[0]
except:
vid = None
pass
if vid:
if vid not in uniqhash:
uniqhash[vid] = True
uniq.append(v)
else:
collisions = collisions + 1
rawdata.clear()
_logger.info("\tprocessing group data: " + str(group) + ": removed " + str(collisions) + " records as duplicate or out-of-date")
# datafile updates
updatetime = int(time.time())
now = time.strftime("%Y-%m-%d", time.gmtime(updatetime))
datafilename = "data_" + now + "_to_" + now + ".json"
rc = save_anchore_feed_group_data(feed, group, datafilename, uniq)
if rc:
for dfile in feedmeta[feed]['groups'][group]['datafiles']:
if dfile != datafilename:
delete_anchore_feed_group_data(feed, group, dfile)
feedmeta[feed]['groups'][group]['datafiles'] = [datafilename]
save_anchore_feedmeta(feedmeta)
return(ret)
......@@ -678,3 +678,17 @@ class AnchoreImageDB_FS(anchore_image_db_base.AnchoreImageDB):
ret = json.loads(FH.read().decode('utf8'))
return(ret)
def delete_feed_group_data(self, feed, group, datafilename):
ret = True
basedir = self.feedrootdir
thefile = os.path.join(basedir, feed, group, datafilename)
if os.path.exists(thefile):
try:
os.remove(thefile)
except:
ret = False
return(ret)
......@@ -901,6 +901,87 @@ def rpm_get_all_pkgfiles_orig(unpackdir):
return(rpmfiles)
def gem_parse_meta(gem):
ret = {}
name = None
versions = []
lics = []
latest = None
origins = []
sourcepkg = None
rfiles = []
try:
for line in gem.splitlines():
line = line.strip()
line = re.sub("\.freeze", "", line)
# look for the unicode \u{} format and try to convert to something python can use
try:
replline = line
mat = "\\\u{.*?}"
patt = re.match(r".*("+mat+").*", replline)
while(patt):
replstr = ""
subpatt = re.match("\\\u{(.*)}", patt.group(1))
if subpatt:
chars = subpatt.group(1).split()
for char in chars:
replstr += unichr(int(char, 16))
if replstr:
replline = re.sub(re.escape(patt.group(1)), replstr, replline, 1)
patt = re.match(r".*("+mat+").*", replline)
line = replline
except Exception as err:
pass
patt = re.match(".*\.name *= *(.*) *", line)
if patt:
name = json.loads(patt.group(1))
patt = re.match(".*\.homepage *= *(.*) *", line)
if patt:
sourcepkg = json.loads(patt.group(1))
patt = re.match(".*\.version *= *(.*) *", line)
if patt:
v = json.loads(patt.group(1))
latest = v
versions.append(latest)
patt = re.match(".*\.licenses *= *(.*) *", line)
if patt:
lstr = re.sub("^\[|\]$", "", patt.group(1)).split(',')
for thestr in lstr:
thestr = re.sub(' *" *', "", thestr)
lics.append(thestr)
patt = re.match(".*\.authors *= *(.*) *", line)
if patt:
lstr = re.sub("^\[|\]$", "", patt.group(1)).split(',')
for thestr in lstr:
thestr = re.sub(' *" *', "", thestr)
origins.append(thestr)
patt = re.match(".*\.files *= *(.*) *", line)
if patt:
lstr = re.sub("^\[|\]$", "", patt.group(1)).split(',')
for thestr in lstr:
thestr = re.sub(' *" *', "", thestr)
rfiles.append(thestr)
except Exception as err:
print "WARN could not fully parse gemspec file: " + str(name) + ": exception: " + str(err)
return({})
if name:
ret[name] = {'name':name, 'lics':lics, 'versions':versions, 'latest':latest, 'origins':origins, 'sourcepkg':sourcepkg, 'files':rfiles}
return(ret)
def npm_parse_meta(npm):
record = {}
......@@ -1351,9 +1432,10 @@ def normalize_packages(imageId):
return(ret)
def cve_scan_packages(cve_data, norm_packages, flavor):
import time
#start = time.time()
results = {}
for v in cve_data:
vuln = v['Vulnerability']
#print "cve-scan: CVE: " + vuln['Name']
......@@ -1442,7 +1524,10 @@ def cve_scanimage(cve_data, imageId):
print "cve-scan: could not determin image distro: returning empty value"
return({})
import time
norm_packages = normalize_packages(imageId)
if 'bin_packages' not in norm_packages or not norm_packages['bin_packages']:
return({})
......
......@@ -203,7 +203,8 @@ def unsub(feednames, delete, dontask):
@feeds.command(name='sync', short_help="Sync (download) latest data for all subscribed feeds from the Anchore service.")
@click.option('--since', help='Force a feed sync from the given timestamp to today.', metavar='<unix timestamp>')
def sync(since):
@click.option('--do-compact', help='After syncing, process feed data to eliminate duplicate entries and store only latest data records', is_flag=True)
def sync(since, do_compact):
"""
Sync (download) latest data for all subscribed feeds from the Anchore service.
"""
......@@ -215,7 +216,7 @@ def sync(since):
anchore_print_err(ret['text'])
ecode = 1
else:
rc, ret = anchore_feeds.sync_feeds(force_since=since)
rc, ret = anchore_feeds.sync_feeds(force_since=since, do_combine=do_compact)
if not rc:
anchore_print_err(ret['text'])
ecode = 1
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment