#!/usr/bin/perl -w # This script is Public Domain. # Original version: Hanno Hecker (ankh-morph.org) # Changes: Stephan Knauss (osm-tools.org) use strict; use Date::Parse; use POSIX qw(strftime); use File::stat; use Time::localtime; use LWP::Simple; if (scalar(@ARGV) != 1) { print STDERR "Detect problematic OSM nodes\n"; print STDERR "Syntax: check.pl \n"; exit 1; } open OSM,$ARGV[0]; # list is updated every hour on the server. So no need to recheck more often than every 30 minutes if (! -e 'users_agreed.txt' || stat("users_agreed.txt")->mtime < time-30*60) { my $res; print STDERR "downloading user acceptance data from server...\n"; $res = getstore('http://planet.openstreetmap.org/users_agreed/users_agreed.txt', 'users_agreed.txt'); if (is_error($res)) { print STDERR "failed downloading users_agreed.txt: ".status_message($res)."\n"; exit 1; } $res = getstore('http://planet.openstreetmap.org/users_agreed/users_disagreed.txt', 'users_disagreed.txt'); if (is_error($res)) { print STDERR "failed downloading users_disagreed.txt: ".status_message($res)."\n"; exit 1; } $res = getstore('http://planet.openstreetmap.org/users_agreed/anon_changesets_agreed.txt', 'anon_changesets_agreed.txt'); if (is_error($res)) { print STDERR "failed downloading anon_changesets_agreed.txt: ".status_message($res)."\n"; exit 1; } print STDERR "... done.\n"; } else { print STDERR "Acceptance data still current, using cached copy.\n"; } my %accepted; my $acceptedDate = stat("users_agreed.txt")->mtime; open ACCEPT, "users_agreed.txt"; while () { next unless /^\s*(\d+)\s*$/; $accepted{$1} = 1; } close ACCEPT; my %declined; my $declinedDate = stat("users_disagreed.txt")->mtime; open DECLINE, "users_disagreed.txt"; while () { next unless /^\s*(\d+)\s*$/; $declined{$1} = 1; } close DECLINE; my %anonAccepted; my $anonAcceptedDate = stat("anon_changesets_agreed.txt")->mtime; open ACCEPT, "anon_changesets_agreed.txt"; while () { next unless /^\s*(\d+)\s*$/; $anonAccepted{$1} = 1; } close ACCEPT; # treat anonymous edits as declined unless changelist contained in acceptance list $declined{0} = 1; $accepted{-1} = 1; # http://wiki.openstreetmap.org/wiki/Quick_History_Service/Changeset_Lists#balrog-kun_bot.2Fbbox_edits $accepted{20587} = 1; my %users = (); my %stats = (); my $newestEdit = ''; $stats{loss_node} = 0; $stats{loss_way} = 0; $stats{loss_relation} = 0; $stats{unknown_node} = 0; $stats{unknown_way} = 0; $stats{unknown_relation} = 0; print STDERR "Reading OSM data...\n"; while () { if (/<(node|way|relation) /) { my $what = $1; $stats{"all_$what"}++; my ($user, $uid, $changeset, $version, $ts); if (/uid="(\d+)"/) { $uid = $1; } else { $uid = 0; } if (/user="([^"]+)"/) { $user = $1; } else { $user = "UNKNOWN (old anonymous edit)"; } if ($uid == 0 && /changeset="(\d+)"/) { $changeset = $1; if (exists $anonAccepted{$changeset}) { $uid = -1; $user = "UNKNOWN (accepted ct)"; } } # manual adjust changed user names if ($user eq 'Sam Kuat') { $user = 'Tom Layo'; } my $currentUser = \$users{$user}; $$currentUser->{$what}++; $$currentUser->{uid} = $uid; # first occurence of user. determine ODbL status if (!exists $$currentUser->{odbl}) { $stats{user_all}++; if (!exists $$currentUser->{node}) { $$currentUser->{node} = 0; # ensure that node counter exists } if ($uid >= 286582 || exists $accepted{ $uid }) { $stats{user_odbl}++; $$currentUser->{odbl} = 1; } elsif (exists $declined{$uid}) { $stats{user_declined}++; $$currentUser->{odbl} = -1; } else { $$currentUser->{odbl} = 0; $stats{user_unknown}++; } } if (/version="(\d+)"/) { $version = $1; } else { $version = 0; } # version is 1 -> ODbL commitment of user is definitly valid for this if ($version == 1) { $$currentUser->{"sole_$what"}++; } if ($$currentUser->{odbl} == 1) { if ($version == 1) { ++$stats{"ok_$what"}; } else { ++$stats{"partial_$what"}; } } elsif ($$currentUser->{odbl} == -1) { ++$stats{"loss_$what"}; } elsif ($version == 1) { ++$stats{"unknown_$what"}; } if (/timestamp="([^"]+)"/) { $ts = $1; if ($ts gt $newestEdit) { $newestEdit = $ts; } } if (!$ts) { $ts = 0; } if (!exists $$currentUser->{last}) { $$currentUser->{last} = $ts; } elsif ($$currentUser->{last} lt $ts) { $$currentUser->{last} = $ts; } } } print " \n"; my %rel = map { ($_, $users{$_}->{relation}) } keys %users; my %way = map { ($_, $users{$_}->{way}) } keys %users; my %node = map { ($_, $users{$_}->{node}) } keys %users; print "

Global statistics

\n"; print "Statistics based on edits up to: ".strftime("%Y-%m-%d", gmtime(str2time($newestEdit)))."
\n"; print "Agreed ODbL users up to: ".strftime("%Y-%m-%d", gmtime($acceptedDate))."
\n"; print "Declined ODbL users up to: ".strftime("%Y-%m-%d", gmtime($declinedDate))."
\n"; print "

\n"; printf "Total nodes in DB: $stats{all_node}
- of this safe for ODbL: %d (%.2f%%)
- of this possibly ODbL: %d (%.2f%%)
- of this possible loss: %d (%.2f%%)
- of this definitive loss: %d (%.2f%%)

\n", $stats{ok_node}, ($stats{ok_node}/$stats{all_node})*100, $stats{partial_node}, ($stats{partial_node}/$stats{all_node})*100, $stats{unknown_node}, ($stats{unknown_node}/$stats{all_node})*100, $stats{loss_node}, ($stats{loss_node}/$stats{all_node})*100; printf "Total ways in DB: $stats{all_way}
- of this safe for ODbL: %d (%.2f%%)
- of this possibly ODbL: %d (%.2f%%)
- of this possible loss: %d (%.2f%%)
- of this definitive loss: %d (%.2f%%)

\n", $stats{ok_way}, ($stats{ok_way}/$stats{all_way})*100, $stats{partial_way}, ($stats{partial_way}/$stats{all_way})*100, $stats{unknown_way}, ($stats{unknown_way}/$stats{all_way})*100, $stats{loss_way}, ($stats{loss_way}/$stats{all_way})*100; printf "Total relations in DB: $stats{all_relation}
- of this safe for ODbL: %d (%.2f%%)
- of this possibly ODbL: %d (%.2f%%)
- of this possible loss: %d (%.2f%%)
- of this definitive loss: %d (%.2f%%)

\n", $stats{ok_relation}, ($stats{ok_relation}/$stats{all_relation})*100, $stats{partial_relation}, ($stats{partial_relation}/$stats{all_relation})*100, $stats{unknown_relation}, ($stats{unknown_relation}/$stats{all_relation})*100, $stats{loss_relation}, ($stats{loss_relation}/$stats{all_relation})*100; printf "Total users in DB: %d
- of this already agreed to ODbL: %d (%.2f%%)
- of this not yet answered: %d (%.2f%%)
- of this actively declined new CTs: %d (%.2f%%)

\n", $stats{user_all}, $stats{user_odbl}, ($stats{user_odbl}/$stats{user_all})*100, $stats{user_unknown}, ($stats{user_unknown}/$stats{user_all})*100, $stats{user_declined}, ($stats{user_declined}/$stats{user_all})*100; print "

"; print "
safe for ODbL:
an element is considered safe in case it exists in version 1 and the user already agreed to ODbL
"; print "
possible loss:
an element is possibly lost in case it exists in version 1 and the user has not yet answered
"; print "
possibly ODbL:
the element has a version greater 1. The last editor agreed to ODbL. As this statistics does not evaluate the history it could be possible that the element was created/modified by a user not agreed to ODbL. In this case the whole or parts of the element would be lost. In case all previous versions are done by users that agreed to ODbL (or changes are irrelevant because replaced by newer changes) these elements are safe for ODbL. This is the optimistic assumption most users agree to ODbL.
\n"; print "
definitive loss:
the element was created/editied by a user who actively declined ODbL. In case of license switch this object is removed/edits reverted
\n"; print "
\n"; print "
Last version by user:
The last modification of the element was done by the given user.
\n"; print "
Sole version by user:
The element exists in version 1 and was created by the given user.
\n"; print "
\n"; print "

Special handling

\n"; print "The edits of user 'balrog-kun' ID 20587 are considered clean, see http://wiki.openstreetmap.org/wiki/Quick_History_Service/Changeset_Lists#balrog-kun_bot.2Fbbox_edits\n
"; print "

Detailed statistics

\n"; my ($node_odbl, $node_sole, $node_noodbl, $way_odbl, $way_sole, $way_noodbl, $rel_odbl, $rel_sole, $rel_noodbl); my $declineFilter = "--keep="; my $unknownFilter = "--keep="; my $rank=0; print "\n"; print "\n"; print "\n"; foreach (sort { $node{$b} <=> $node{$a} } grep { defined $node{$_} } keys %node) { $rank++; print " {odbl} == '1' ? "is_odbl" : ($users{$_}->{odbl} == '-1' ? "no_odbl" : "")) ,"\">\n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; # build osmfilter line if ($users{$_}->{odbl} == '-1') { $declineFilter .= '@uid='.$users{$_}->{uid}." "; } elsif ($users{$_}->{odbl} == '0') { $unknownFilter .= '@uid='.$users{$_}->{uid}." "; } } print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; printf "\n",$stats{ok_node}+$stats{unknown_node}; printf "\n",($stats{ok_node}+$stats{unknown_node})*100/$stats{all_node}; printf "\n",$stats{ok_way}+$stats{unknown_way}; printf "\n",($stats{ok_way}+$stats{unknown_way})*100/$stats{all_way}; printf "\n",$stats{ok_relation}+$stats{unknown_relation}; printf "\n",($stats{ok_relation}+$stats{unknown_relation})*100/$stats{all_relation}; print "\n"; print "\n"; print "
Last version by user (not relevant for ODbL)Sole version by user
RankUser (uid)ODbLnodesnodes %waysways %relationsrelations %nodesnodes %waysways %relationsrelations %last edit
$rank$_ ($users{$_}->{uid}) w"; if ($users{$_}->{odbl} != '1') { print " {uid}][\@meta]\" class=\"w\">j"; } print "",$users{$_}->{odbl},"",node_perc($node{$_}) ,"",way_perc($way{$_}) ,"",rel_perc($rel{$_}) ,"",node_perc($users{$_}->{sole_node}) ,"",way_perc($users{$_}->{sole_way}) ,"",rel_perc($users{$_}->{sole_relation}) ,"",strftime("%Y-%m-%d", gmtime(str2time($users{$_}->{last}))),"
Total sums$stats{user_all}$stats{all_node}100%$stats{all_way}100%$stats{all_relation}100%%d%.2f%%%d%.2f%%%d%.2f%%
\n"; print "

Filter for osmfilter

\n"; print "

get all edits from declined:

$declineFilter

\n"; print "

get all edits from undecided:

$unknownFilter

\n"; print "

Statistics created by Stephan Knauss\n"; print "\n"; sub node_perc { my $num = shift; return "  " unless $num; return sprintf "%d%.2f%%", $num, ($num/$stats{all_node}) * 100; } sub way_perc { my $num = shift; return "  " unless $num; return sprintf "%d%.2f%%", $num, ($num/$stats{all_way}) * 100; } sub rel_perc { my $num = shift; return "  " unless $num; return sprintf "%d%.2f%%", $num, ($num/$stats{all_relation}) * 100; }