-
Notifications
You must be signed in to change notification settings - Fork 5.8k
/
Copy pathcodeHeapState.cpp
2425 lines (2203 loc) · 105 KB
/
codeHeapState.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "code/codeHeapState.hpp"
#include "compiler/compileBroker.hpp"
#include "oops/klass.inline.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/safepoint.hpp"
#include "utilities/powerOfTwo.hpp"
// -------------------------
// | General Description |
// -------------------------
// The CodeHeap state analytics are divided in two parts.
// The first part examines the entire CodeHeap and aggregates all
// information that is believed useful/important.
//
// Aggregation condenses the information of a piece of the CodeHeap
// (4096 bytes by default) into an analysis granule. These granules
// contain enough detail to gain initial insight while keeping the
// internal structure sizes in check.
//
// The second part, which consists of several, independent steps,
// prints the previously collected information with emphasis on
// various aspects.
//
// The CodeHeap is a living thing. Therefore, protection against concurrent
// modification (by acquiring the CodeCache_lock) is necessary. It has
// to be provided by the caller of the analysis functions.
// If the CodeCache_lock is not held, the analysis functions may print
// less detailed information or may just do nothing. It is by intention
// that an unprotected invocation is not abnormally terminated.
//
// Data collection and printing is done on an "on request" basis.
// While no request is being processed, there is no impact on performance.
// The CodeHeap state analytics do have some memory footprint.
// The "aggregate" step allocates some data structures to hold the aggregated
// information for later output. These data structures live until they are
// explicitly discarded (function "discard") or until the VM terminates.
// There is one exception: the function "all" does not leave any data
// structures allocated.
//
// Requests for real-time, on-the-fly analysis can be issued via
// jcmd <pid> Compiler.CodeHeap_Analytics [<function>] [<granularity>]
//
// If you are (only) interested in how the CodeHeap looks like after running
// a sample workload, you can use the command line option
// -XX:+PrintCodeHeapAnalytics
// It will cause a full analysis to be written to tty. In addition, a full
// analysis will be written the first time a "CodeCache full" condition is
// detected.
//
// The command line option produces output identical to the jcmd function
// jcmd <pid> Compiler.CodeHeap_Analytics all 4096
// ---------------------------------------------------------------------------------
// With this declaration macro, it is possible to switch between
// - direct output into an argument-passed outputStream and
// - buffered output into a bufferedStream with subsequent flush
// of the filled buffer to the outputStream.
#define USE_BUFFEREDSTREAM
// There are instances when composing an output line or a small set of
// output lines out of many tty->print() calls creates significant overhead.
// Writing to a bufferedStream buffer first has a significant advantage:
// It uses noticeably less cpu cycles and reduces (when writing to a
// network file) the required bandwidth by at least a factor of ten. Observed on MacOS.
// That clearly makes up for the increased code complexity.
//
// Conversion of existing code is easy and straightforward, if the code already
// uses a parameterized output destination, e.g. "outputStream st".
// - rename the formal parameter to any other name, e.g. out_st.
// - at a suitable place in your code, insert
// BUFFEREDSTEAM_DECL(buf_st, out_st)
// This will provide all the declarations necessary. After that, all
// buf_st->print() (and the like) calls will be directed to a bufferedStream object.
// Once a block of output (a line or a small set of lines) is composed, insert
// BUFFEREDSTREAM_FLUSH(termstring)
// to flush the bufferedStream to the final destination out_st. termstring is just
// an arbitrary string (e.g. "\n") which is appended to the bufferedStream before
// being written to out_st. Be aware that the last character written MUST be a '\n'.
// Otherwise, buf_st->position() does not correspond to out_st->position() any longer.
// BUFFEREDSTREAM_FLUSH_LOCKED(termstring)
// does the same thing, protected by the ttyLocker lock.
// BUFFEREDSTREAM_FLUSH_IF(termstring, remSize)
// does a flush only if the remaining buffer space is less than remSize.
//
// To activate, #define USE_BUFFERED_STREAM before including this header.
// If not activated, output will directly go to the originally used outputStream
// with no additional overhead.
//
#if defined(USE_BUFFEREDSTREAM)
// All necessary declarations to print via a bufferedStream
// This macro must be placed before any other BUFFEREDSTREAM*
// macro in the function.
#define BUFFEREDSTREAM_DECL_SIZE(_anyst, _outst, _capa) \
ResourceMark _rm; \
/* _anyst name of the stream as used in the code */ \
/* _outst stream where final output will go to */ \
/* _capa allocated capacity of stream buffer */ \
size_t _nflush = 0; \
size_t _nforcedflush = 0; \
size_t _nsavedflush = 0; \
size_t _nlockedflush = 0; \
size_t _nflush_bytes = 0; \
size_t _capacity = _capa; \
bufferedStream _sstobj(_capa); \
bufferedStream* _sstbuf = &_sstobj; \
outputStream* _outbuf = _outst; \
bufferedStream* _anyst = &_sstobj; /* any stream. Use this to just print - no buffer flush. */
// Same as above, but with fixed buffer size.
#define BUFFEREDSTREAM_DECL(_anyst, _outst) \
BUFFEREDSTREAM_DECL_SIZE(_anyst, _outst, 4*K);
// Flush the buffer contents unconditionally.
// No action if the buffer is empty.
#define BUFFEREDSTREAM_FLUSH(_termString) \
if (((_termString) != nullptr) && (strlen(_termString) > 0)){\
_sstbuf->print("%s", _termString); \
} \
if (_sstbuf != _outbuf) { \
if (_sstbuf->size() != 0) { \
_nforcedflush++; _nflush_bytes += _sstbuf->size(); \
_outbuf->print("%s", _sstbuf->as_string()); \
_sstbuf->reset(); \
} \
}
// Flush the buffer contents if the remaining capacity is
// less than the given threshold.
#define BUFFEREDSTREAM_FLUSH_IF(_termString, _remSize) \
if (((_termString) != nullptr) && (strlen(_termString) > 0)){\
_sstbuf->print("%s", _termString); \
} \
if (_sstbuf != _outbuf) { \
if ((_capacity - _sstbuf->size()) < (size_t)(_remSize)){\
_nflush++; _nforcedflush--; \
BUFFEREDSTREAM_FLUSH("") \
} else { \
_nsavedflush++; \
} \
}
// Flush the buffer contents if the remaining capacity is less
// than the calculated threshold (256 bytes + capacity/16)
// That should suffice for all reasonably sized output lines.
#define BUFFEREDSTREAM_FLUSH_AUTO(_termString) \
BUFFEREDSTREAM_FLUSH_IF(_termString, 256+(_capacity>>4))
#define BUFFEREDSTREAM_FLUSH_LOCKED(_termString) \
{ ttyLocker ttyl;/* keep this output block together */ \
_nlockedflush++; \
BUFFEREDSTREAM_FLUSH(_termString) \
}
// #define BUFFEREDSTREAM_FLUSH_STAT() \
// if (_sstbuf != _outbuf) { \
// _outbuf->print_cr("%ld flushes (buffer full), %ld forced, %ld locked, %ld bytes total, %ld flushes saved", _nflush, _nforcedflush, _nlockedflush, _nflush_bytes, _nsavedflush); \
// }
#define BUFFEREDSTREAM_FLUSH_STAT()
#else
#define BUFFEREDSTREAM_DECL_SIZE(_anyst, _outst, _capa) \
size_t _capacity = _capa; \
outputStream* _outbuf = _outst; \
outputStream* _anyst = _outst; /* any stream. Use this to just print - no buffer flush. */
#define BUFFEREDSTREAM_DECL(_anyst, _outst) \
BUFFEREDSTREAM_DECL_SIZE(_anyst, _outst, 4*K)
#define BUFFEREDSTREAM_FLUSH(_termString) \
if (((_termString) != nullptr) && (strlen(_termString) > 0)){\
_outbuf->print("%s", _termString); \
}
#define BUFFEREDSTREAM_FLUSH_IF(_termString, _remSize) \
BUFFEREDSTREAM_FLUSH(_termString)
#define BUFFEREDSTREAM_FLUSH_AUTO(_termString) \
BUFFEREDSTREAM_FLUSH(_termString)
#define BUFFEREDSTREAM_FLUSH_LOCKED(_termString) \
BUFFEREDSTREAM_FLUSH(_termString)
#define BUFFEREDSTREAM_FLUSH_STAT()
#endif
#define HEX32_FORMAT "0x%x" // just a helper format string used below multiple times
const char blobTypeChar[] = {' ', 'C', 'N', 'I', 'X', 'Z', 'U', 'R', '?', 'D', 'T', 'E', 'S', 'A', 'M', 'B', 'L' };
const char* blobTypeName[] = {"noType"
, "nMethod (under construction), cannot be observed"
, "nMethod (active)"
, "nMethod (inactive)"
, "nMethod (deopt)"
, "runtime stub"
, "ricochet stub"
, "deopt stub"
, "uncommon trap stub"
, "exception stub"
, "safepoint stub"
, "adapter blob"
, "MH adapter blob"
, "buffer blob"
, "lastType"
};
const char* compTypeName[] = { "none", "c1", "c2", "jvmci" };
// Be prepared for ten different CodeHeap segments. Should be enough for a few years.
const unsigned int nSizeDistElements = 31; // logarithmic range growth, max size: 2**32
const unsigned int maxTopSizeBlocks = 100;
const unsigned int tsbStopper = 2 * maxTopSizeBlocks;
const unsigned int maxHeaps = 10;
static unsigned int nHeaps = 0;
static struct CodeHeapStat CodeHeapStatArray[maxHeaps];
// static struct StatElement *StatArray = nullptr;
static StatElement* StatArray = nullptr;
static int log2_seg_size = 0;
static size_t seg_size = 0;
static size_t alloc_granules = 0;
static size_t granule_size = 0;
static bool segment_granules = false;
static unsigned int nBlocks_t1 = 0; // counting "in_use" nmethods only.
static unsigned int nBlocks_t2 = 0; // counting "in_use" nmethods only.
static unsigned int nBlocks_alive = 0; // counting "not_used" and "not_entrant" nmethods only.
static unsigned int nBlocks_stub = 0;
static struct FreeBlk* FreeArray = nullptr;
static unsigned int alloc_freeBlocks = 0;
static struct TopSizeBlk* TopSizeArray = nullptr;
static unsigned int alloc_topSizeBlocks = 0;
static unsigned int used_topSizeBlocks = 0;
static struct SizeDistributionElement* SizeDistributionArray = nullptr;
static int latest_compilation_id = 0;
static volatile bool initialization_complete = false;
const char* CodeHeapState::get_heapName(CodeHeap* heap) {
if (SegmentedCodeCache) {
return heap->name();
} else {
return "CodeHeap";
}
}
// returns the index for the heap being processed.
unsigned int CodeHeapState::findHeapIndex(outputStream* out, const char* heapName) {
if (heapName == nullptr) {
return maxHeaps;
}
if (SegmentedCodeCache) {
// Search for a pre-existing entry. If found, return that index.
for (unsigned int i = 0; i < nHeaps; i++) {
if (CodeHeapStatArray[i].heapName != nullptr && strcmp(heapName, CodeHeapStatArray[i].heapName) == 0) {
return i;
}
}
// check if there are more code heap segments than we can handle.
if (nHeaps == maxHeaps) {
out->print_cr("Too many heap segments for current limit(%d).", maxHeaps);
return maxHeaps;
}
// allocate new slot in StatArray.
CodeHeapStatArray[nHeaps].heapName = heapName;
return nHeaps++;
} else {
nHeaps = 1;
CodeHeapStatArray[0].heapName = heapName;
return 0; // This is the default index if CodeCache is not segmented.
}
}
void CodeHeapState::get_HeapStatGlobals(outputStream* out, const char* heapName) {
unsigned int ix = findHeapIndex(out, heapName);
if (ix < maxHeaps) {
StatArray = CodeHeapStatArray[ix].StatArray;
seg_size = CodeHeapStatArray[ix].segment_size;
log2_seg_size = seg_size == 0 ? 0 : exact_log2(seg_size);
alloc_granules = CodeHeapStatArray[ix].alloc_granules;
granule_size = CodeHeapStatArray[ix].granule_size;
segment_granules = CodeHeapStatArray[ix].segment_granules;
nBlocks_t1 = CodeHeapStatArray[ix].nBlocks_t1;
nBlocks_t2 = CodeHeapStatArray[ix].nBlocks_t2;
nBlocks_alive = CodeHeapStatArray[ix].nBlocks_alive;
nBlocks_stub = CodeHeapStatArray[ix].nBlocks_stub;
FreeArray = CodeHeapStatArray[ix].FreeArray;
alloc_freeBlocks = CodeHeapStatArray[ix].alloc_freeBlocks;
TopSizeArray = CodeHeapStatArray[ix].TopSizeArray;
alloc_topSizeBlocks = CodeHeapStatArray[ix].alloc_topSizeBlocks;
used_topSizeBlocks = CodeHeapStatArray[ix].used_topSizeBlocks;
SizeDistributionArray = CodeHeapStatArray[ix].SizeDistributionArray;
} else {
StatArray = nullptr;
seg_size = 0;
log2_seg_size = 0;
alloc_granules = 0;
granule_size = 0;
segment_granules = false;
nBlocks_t1 = 0;
nBlocks_t2 = 0;
nBlocks_alive = 0;
nBlocks_stub = 0;
FreeArray = nullptr;
alloc_freeBlocks = 0;
TopSizeArray = nullptr;
alloc_topSizeBlocks = 0;
used_topSizeBlocks = 0;
SizeDistributionArray = nullptr;
}
}
void CodeHeapState::set_HeapStatGlobals(outputStream* out, const char* heapName) {
unsigned int ix = findHeapIndex(out, heapName);
if (ix < maxHeaps) {
CodeHeapStatArray[ix].StatArray = StatArray;
CodeHeapStatArray[ix].segment_size = seg_size;
CodeHeapStatArray[ix].alloc_granules = alloc_granules;
CodeHeapStatArray[ix].granule_size = granule_size;
CodeHeapStatArray[ix].segment_granules = segment_granules;
CodeHeapStatArray[ix].nBlocks_t1 = nBlocks_t1;
CodeHeapStatArray[ix].nBlocks_t2 = nBlocks_t2;
CodeHeapStatArray[ix].nBlocks_alive = nBlocks_alive;
CodeHeapStatArray[ix].nBlocks_stub = nBlocks_stub;
CodeHeapStatArray[ix].FreeArray = FreeArray;
CodeHeapStatArray[ix].alloc_freeBlocks = alloc_freeBlocks;
CodeHeapStatArray[ix].TopSizeArray = TopSizeArray;
CodeHeapStatArray[ix].alloc_topSizeBlocks = alloc_topSizeBlocks;
CodeHeapStatArray[ix].used_topSizeBlocks = used_topSizeBlocks;
CodeHeapStatArray[ix].SizeDistributionArray = SizeDistributionArray;
}
}
//---< get a new statistics array >---
void CodeHeapState::prepare_StatArray(outputStream* out, size_t nElem, size_t granularity, const char* heapName) {
if (StatArray == nullptr) {
StatArray = new StatElement[nElem];
//---< reset some counts >---
alloc_granules = nElem;
granule_size = granularity;
}
if (StatArray == nullptr) {
//---< just do nothing if allocation failed >---
out->print_cr("Statistics could not be collected for %s, probably out of memory.", heapName);
out->print_cr("Current granularity is %zu bytes. Try a coarser granularity.", granularity);
alloc_granules = 0;
granule_size = 0;
} else {
//---< initialize statistics array >---
memset((void*)StatArray, 0, nElem*sizeof(StatElement));
}
}
//---< get a new free block array >---
void CodeHeapState::prepare_FreeArray(outputStream* out, unsigned int nElem, const char* heapName) {
if (FreeArray == nullptr) {
FreeArray = new FreeBlk[nElem];
//---< reset some counts >---
alloc_freeBlocks = nElem;
}
if (FreeArray == nullptr) {
//---< just do nothing if allocation failed >---
out->print_cr("Free space analysis cannot be done for %s, probably out of memory.", heapName);
alloc_freeBlocks = 0;
} else {
//---< initialize free block array >---
memset((void*)FreeArray, 0, alloc_freeBlocks*sizeof(FreeBlk));
}
}
//---< get a new TopSizeArray >---
void CodeHeapState::prepare_TopSizeArray(outputStream* out, unsigned int nElem, const char* heapName) {
if (TopSizeArray == nullptr) {
TopSizeArray = new TopSizeBlk[nElem];
//---< reset some counts >---
alloc_topSizeBlocks = nElem;
used_topSizeBlocks = 0;
}
if (TopSizeArray == nullptr) {
//---< just do nothing if allocation failed >---
out->print_cr("Top-%d list of largest CodeHeap blocks can not be collected for %s, probably out of memory.", nElem, heapName);
alloc_topSizeBlocks = 0;
} else {
//---< initialize TopSizeArray >---
memset((void*)TopSizeArray, 0, nElem*sizeof(TopSizeBlk));
used_topSizeBlocks = 0;
}
}
//---< get a new SizeDistributionArray >---
void CodeHeapState::prepare_SizeDistArray(outputStream* out, unsigned int nElem, const char* heapName) {
if (SizeDistributionArray == nullptr) {
SizeDistributionArray = new SizeDistributionElement[nElem];
}
if (SizeDistributionArray == nullptr) {
//---< just do nothing if allocation failed >---
out->print_cr("Size distribution can not be collected for %s, probably out of memory.", heapName);
} else {
//---< initialize SizeDistArray >---
memset((void*)SizeDistributionArray, 0, nElem*sizeof(SizeDistributionElement));
// Logarithmic range growth. First range starts at _segment_size.
SizeDistributionArray[log2_seg_size-1].rangeEnd = 1U;
for (unsigned int i = log2_seg_size; i < nElem; i++) {
SizeDistributionArray[i].rangeStart = 1U << (i - log2_seg_size);
SizeDistributionArray[i].rangeEnd = 1U << ((i+1) - log2_seg_size);
}
}
}
//---< get a new SizeDistributionArray >---
void CodeHeapState::update_SizeDistArray(outputStream* out, unsigned int len) {
if (SizeDistributionArray != nullptr) {
for (unsigned int i = log2_seg_size-1; i < nSizeDistElements; i++) {
if ((SizeDistributionArray[i].rangeStart <= len) && (len < SizeDistributionArray[i].rangeEnd)) {
SizeDistributionArray[i].lenSum += len;
SizeDistributionArray[i].count++;
break;
}
}
}
}
void CodeHeapState::discard_StatArray(outputStream* out) {
if (StatArray != nullptr) {
delete StatArray;
StatArray = nullptr;
alloc_granules = 0;
granule_size = 0;
}
}
void CodeHeapState::discard_FreeArray(outputStream* out) {
if (FreeArray != nullptr) {
delete[] FreeArray;
FreeArray = nullptr;
alloc_freeBlocks = 0;
}
}
void CodeHeapState::discard_TopSizeArray(outputStream* out) {
if (TopSizeArray != nullptr) {
for (unsigned int i = 0; i < alloc_topSizeBlocks; i++) {
if (TopSizeArray[i].blob_name != nullptr) {
os::free((void*)TopSizeArray[i].blob_name);
}
}
delete[] TopSizeArray;
TopSizeArray = nullptr;
alloc_topSizeBlocks = 0;
used_topSizeBlocks = 0;
}
}
void CodeHeapState::discard_SizeDistArray(outputStream* out) {
if (SizeDistributionArray != nullptr) {
delete[] SizeDistributionArray;
SizeDistributionArray = nullptr;
}
}
// Discard all allocated internal data structures.
// This should be done after an analysis session is completed.
void CodeHeapState::discard(outputStream* out, CodeHeap* heap) {
if (!initialization_complete) {
return;
}
if (nHeaps > 0) {
for (unsigned int ix = 0; ix < nHeaps; ix++) {
get_HeapStatGlobals(out, CodeHeapStatArray[ix].heapName);
discard_StatArray(out);
discard_FreeArray(out);
discard_TopSizeArray(out);
discard_SizeDistArray(out);
set_HeapStatGlobals(out, CodeHeapStatArray[ix].heapName);
CodeHeapStatArray[ix].heapName = nullptr;
}
nHeaps = 0;
}
}
void CodeHeapState::aggregate(outputStream* out, CodeHeap* heap, size_t granularity) {
unsigned int nBlocks_free = 0;
unsigned int nBlocks_used = 0;
unsigned int nBlocks_zomb = 0;
unsigned int nBlocks_disconn = 0;
unsigned int nBlocks_notentr = 0;
//---< max & min of TopSizeArray >---
// it is sufficient to have these sizes as 32bit unsigned ints.
// The CodeHeap is limited in size to 4GB. Furthermore, the sizes
// are stored in _segment_size units, scaling them down by a factor of 64 (at least).
unsigned int currMax = 0;
unsigned int currMin = 0;
unsigned int currMin_ix = 0;
unsigned long total_iterations = 0;
bool done = false;
const int min_granules = 256;
const int max_granules = 512*K; // limits analyzable CodeHeap (with segment_granules) to 32M..128M
// results in StatArray size of 24M (= max_granules * 48 Bytes per element)
// For a 1GB CodeHeap, the granule size must be at least 2kB to not violate the max_granles limit.
const char* heapName = get_heapName(heap);
BUFFEREDSTREAM_DECL(ast, out)
if (!initialization_complete) {
memset(CodeHeapStatArray, 0, sizeof(CodeHeapStatArray));
initialization_complete = true;
printBox(ast, '=', "C O D E H E A P A N A L Y S I S (general remarks)", nullptr);
ast->print_cr(" The code heap analysis function provides deep insights into\n"
" the inner workings and the internal state of the Java VM's\n"
" code cache - the place where all the JVM generated machine\n"
" code is stored.\n"
" \n"
" This function is designed and provided for support engineers\n"
" to help them understand and solve issues in customer systems.\n"
" It is not intended for use and interpretation by other persons.\n"
" \n");
BUFFEREDSTREAM_FLUSH("")
}
get_HeapStatGlobals(out, heapName);
// Since we are (and must be) analyzing the CodeHeap contents under the CodeCache_lock,
// all heap information is "constant" and can be safely extracted/calculated before we
// enter the while() loop. Actually, the loop will only be iterated once.
char* low_bound = heap->low_boundary();
size_t size = heap->capacity();
size_t res_size = heap->max_capacity();
seg_size = heap->segment_size();
log2_seg_size = seg_size == 0 ? 0 : exact_log2(seg_size); // This is a global static value.
if (seg_size == 0) {
printBox(ast, '-', "Heap not fully initialized yet, segment size is zero for segment ", heapName);
BUFFEREDSTREAM_FLUSH("")
return;
}
if (!holding_required_locks()) {
printBox(ast, '-', "Must be at safepoint or hold Compile_lock and CodeCache_lock when calling aggregate function for ", heapName);
BUFFEREDSTREAM_FLUSH("")
return;
}
// Calculate granularity of analysis (and output).
// The CodeHeap is managed (allocated) in segments (units) of CodeCacheSegmentSize.
// The CodeHeap can become fairly large, in particular in productive real-life systems.
//
// It is often neither feasible nor desirable to aggregate the data with the highest possible
// level of detail, i.e. inspecting and printing each segment on its own.
//
// The granularity parameter allows to specify the level of detail available in the analysis.
// It must be a positive multiple of the segment size and should be selected such that enough
// detail is provided while, at the same time, the printed output does not explode.
//
// By manipulating the granularity value, we enforce that at least min_granules units
// of analysis are available. We also enforce an upper limit of max_granules units to
// keep the amount of allocated storage in check.
//
// Finally, we adjust the granularity such that each granule covers at most 64k-1 segments.
// This is necessary to prevent an unsigned short overflow while accumulating space information.
//
assert(granularity > 0, "granularity should be positive.");
if (granularity > size) {
granularity = size;
}
if (size/granularity < min_granules) {
granularity = size/min_granules; // at least min_granules granules
}
granularity = granularity & (~(seg_size - 1)); // must be multiple of seg_size
if (granularity < seg_size) {
granularity = seg_size; // must be at least seg_size
}
if (size/granularity > max_granules) {
granularity = size/max_granules; // at most max_granules granules
}
granularity = granularity & (~(seg_size - 1)); // must be multiple of seg_size
if (granularity>>log2_seg_size >= (1L<<sizeof(unsigned short)*8)) {
granularity = ((1L<<(sizeof(unsigned short)*8))-1)<<log2_seg_size; // Limit: (64k-1) * seg_size
}
segment_granules = granularity == seg_size;
size_t granules = (size + (granularity-1))/granularity;
printBox(ast, '=', "C O D E H E A P A N A L Y S I S (used blocks) for segment ", heapName);
ast->print_cr(" The aggregate step takes an aggregated snapshot of the CodeHeap.\n"
" Subsequent print functions create their output based on this snapshot.\n"
" The CodeHeap is a living thing, and every effort has been made for the\n"
" collected data to be consistent. Only the method names and signatures\n"
" are retrieved at print time. That may lead to rare cases where the\n"
" name of a method is no longer available, e.g. because it was unloaded.\n");
ast->print_cr(" CodeHeap committed size %zuK (%zuM), reserved size %zuK (%zuM), %d%% occupied.",
size/(size_t)K, size/(size_t)M, res_size/(size_t)K, res_size/(size_t)M, (unsigned int)(100.0*size/res_size));
ast->print_cr(" CodeHeap allocation segment size is %zu bytes. This is the smallest possible granularity.", seg_size);
ast->print_cr(" CodeHeap (committed part) is mapped to %zu granules of size %zu bytes.", granules, granularity);
ast->print_cr(" Each granule takes %zu bytes of C heap, that is %zuK in total for statistics data.", sizeof(StatElement), (sizeof(StatElement)*granules)/(size_t)K);
ast->print_cr(" The number of granules is limited to %dk, requiring a granules size of at least %d bytes for a 1GB heap.", (unsigned int)(max_granules/K), (unsigned int)(G/max_granules));
BUFFEREDSTREAM_FLUSH("\n")
while (!done) {
//---< reset counters with every aggregation >---
nBlocks_t1 = 0;
nBlocks_t2 = 0;
nBlocks_alive = 0;
nBlocks_stub = 0;
nBlocks_free = 0;
nBlocks_used = 0;
nBlocks_zomb = 0;
nBlocks_disconn = 0;
nBlocks_notentr = 0;
//---< discard old arrays if size does not match >---
if (granules != alloc_granules) {
discard_StatArray(out);
discard_TopSizeArray(out);
}
//---< allocate arrays if they don't yet exist, initialize >---
prepare_StatArray(out, granules, granularity, heapName);
if (StatArray == nullptr) {
set_HeapStatGlobals(out, heapName);
return;
}
prepare_TopSizeArray(out, maxTopSizeBlocks, heapName);
prepare_SizeDistArray(out, nSizeDistElements, heapName);
latest_compilation_id = CompileBroker::get_compilation_id();
int highest_compilation_id = 0;
size_t usedSpace = 0;
size_t t1Space = 0;
size_t t2Space = 0;
size_t aliveSpace = 0;
size_t disconnSpace = 0;
size_t notentrSpace = 0;
size_t stubSpace = 0;
size_t freeSpace = 0;
size_t maxFreeSize = 0;
HeapBlock* maxFreeBlock = nullptr;
bool insane = false;
unsigned int n_methods = 0;
for (HeapBlock *h = heap->first_block(); h != nullptr && !insane; h = heap->next_block(h)) {
unsigned int hb_len = (unsigned int)h->length(); // despite being size_t, length can never overflow an unsigned int.
size_t hb_bytelen = ((size_t)hb_len)<<log2_seg_size;
unsigned int ix_beg = (unsigned int)(((char*)h-low_bound)/granule_size);
unsigned int ix_end = (unsigned int)(((char*)h-low_bound+(hb_bytelen-1))/granule_size);
int compile_id = 0;
CompLevel comp_lvl = CompLevel_none;
compType cType = noComp;
blobType cbType = noType;
//---< some sanity checks >---
// Do not assert here, just check, print error message and return.
// This is a diagnostic function. It is not supposed to tear down the VM.
if ((char*)h < low_bound) {
insane = true; ast->print_cr("Sanity check: HeapBlock @%p below low bound (%p)", (char*)h, low_bound);
}
if ((char*)h > (low_bound + res_size)) {
insane = true; ast->print_cr("Sanity check: HeapBlock @%p outside reserved range (%p)", (char*)h, low_bound + res_size);
}
if ((char*)h > (low_bound + size)) {
insane = true; ast->print_cr("Sanity check: HeapBlock @%p outside used range (%p)", (char*)h, low_bound + size);
}
if (ix_end >= granules) {
insane = true; ast->print_cr("Sanity check: end index (%d) out of bounds (%zu)", ix_end, granules);
}
if (size != heap->capacity()) {
insane = true; ast->print_cr("Sanity check: code heap capacity has changed (%zuK to %zuK)", size/(size_t)K, heap->capacity()/(size_t)K);
}
if (ix_beg > ix_end) {
insane = true; ast->print_cr("Sanity check: end index (%d) lower than begin index (%d)", ix_end, ix_beg);
}
if (insane) {
BUFFEREDSTREAM_FLUSH("")
continue;
}
if (h->free()) {
nBlocks_free++;
freeSpace += hb_bytelen;
if (hb_bytelen > maxFreeSize) {
maxFreeSize = hb_bytelen;
maxFreeBlock = h;
}
} else {
update_SizeDistArray(out, hb_len);
nBlocks_used++;
usedSpace += hb_bytelen;
CodeBlob* cb = (CodeBlob*)heap->find_start(h);
cbType = get_cbType(cb); // Will check for cb == nullptr and other safety things.
if (cbType != noType) {
const char* blob_name = nullptr;
unsigned int nm_size = 0;
nmethod* nm = cb->as_nmethod_or_null();
if (nm != nullptr) { // no is_readable check required, nm = (nmethod*)cb.
ResourceMark rm;
Method* method = nm->method();
if (nm->is_in_use() || nm->is_not_entrant()) {
blob_name = os::strdup(method->name_and_sig_as_C_string());
} else {
blob_name = os::strdup(cb->name());
}
#if INCLUDE_JVMCI
const char* jvmci_name = nm->jvmci_name();
if (jvmci_name != nullptr) {
size_t size = ::strlen(blob_name) + ::strlen(" jvmci_name=") + ::strlen(jvmci_name) + 1;
char* new_blob_name = (char*)os::malloc(size, mtInternal);
os::snprintf(new_blob_name, size, "%s jvmci_name=%s", blob_name, jvmci_name);
os::free((void*)blob_name);
blob_name = new_blob_name;
}
#endif
nm_size = nm->total_size();
compile_id = nm->compile_id();
comp_lvl = (CompLevel)(nm->comp_level());
if (nm->is_compiled_by_c1()) {
cType = c1;
}
if (nm->is_compiled_by_c2()) {
cType = c2;
}
if (nm->is_compiled_by_jvmci()) {
cType = jvmci;
}
switch (cbType) {
case nMethod_inuse: { // only for executable methods!!!
// space for these cbs is accounted for later.
n_methods++;
break;
}
case nMethod_notused:
nBlocks_alive++;
nBlocks_disconn++;
aliveSpace += hb_bytelen;
disconnSpace += hb_bytelen;
break;
case nMethod_notentrant: // equivalent to nMethod_alive
nBlocks_alive++;
nBlocks_notentr++;
aliveSpace += hb_bytelen;
notentrSpace += hb_bytelen;
break;
default:
break;
}
} else {
blob_name = os::strdup(cb->name());
}
//------------------------------------------
//---< register block in TopSizeArray >---
//------------------------------------------
if (alloc_topSizeBlocks > 0) {
if (used_topSizeBlocks == 0) {
TopSizeArray[0].start = h;
TopSizeArray[0].blob_name = blob_name;
TopSizeArray[0].len = hb_len;
TopSizeArray[0].index = tsbStopper;
TopSizeArray[0].nm_size = nm_size;
TopSizeArray[0].compiler = cType;
TopSizeArray[0].level = comp_lvl;
TopSizeArray[0].type = cbType;
currMax = hb_len;
currMin = hb_len;
currMin_ix = 0;
used_topSizeBlocks++;
blob_name = nullptr; // indicate blob_name was consumed
// This check roughly cuts 5000 iterations (JVM98, mixed, dbg, termination stats):
} else if ((used_topSizeBlocks < alloc_topSizeBlocks) && (hb_len < currMin)) {
//---< all blocks in list are larger, but there is room left in array >---
TopSizeArray[currMin_ix].index = used_topSizeBlocks;
TopSizeArray[used_topSizeBlocks].start = h;
TopSizeArray[used_topSizeBlocks].blob_name = blob_name;
TopSizeArray[used_topSizeBlocks].len = hb_len;
TopSizeArray[used_topSizeBlocks].index = tsbStopper;
TopSizeArray[used_topSizeBlocks].nm_size = nm_size;
TopSizeArray[used_topSizeBlocks].compiler = cType;
TopSizeArray[used_topSizeBlocks].level = comp_lvl;
TopSizeArray[used_topSizeBlocks].type = cbType;
currMin = hb_len;
currMin_ix = used_topSizeBlocks;
used_topSizeBlocks++;
blob_name = nullptr; // indicate blob_name was consumed
} else {
// This check cuts total_iterations by a factor of 6 (JVM98, mixed, dbg, termination stats):
// We don't need to search the list if we know beforehand that the current block size is
// smaller than the currently recorded minimum and there is no free entry left in the list.
if (!((used_topSizeBlocks == alloc_topSizeBlocks) && (hb_len <= currMin))) {
if (currMax < hb_len) {
currMax = hb_len;
}
unsigned int i;
unsigned int prev_i = tsbStopper;
unsigned int limit_i = 0;
for (i = 0; i != tsbStopper; i = TopSizeArray[i].index) {
if (limit_i++ >= alloc_topSizeBlocks) {
insane = true; break; // emergency exit
}
if (i >= used_topSizeBlocks) {
insane = true; break; // emergency exit
}
total_iterations++;
if (TopSizeArray[i].len < hb_len) {
//---< We want to insert here, element <i> is smaller than the current one >---
if (used_topSizeBlocks < alloc_topSizeBlocks) { // still room for a new entry to insert
// old entry gets moved to the next free element of the array.
// That's necessary to keep the entry for the largest block at index 0.
// This move might cause the current minimum to be moved to another place
if (i == currMin_ix) {
assert(TopSizeArray[i].len == currMin, "sort error");
currMin_ix = used_topSizeBlocks;
}
memcpy((void*)&TopSizeArray[used_topSizeBlocks], (void*)&TopSizeArray[i], sizeof(TopSizeBlk));
TopSizeArray[i].start = h;
TopSizeArray[i].blob_name = blob_name;
TopSizeArray[i].len = hb_len;
TopSizeArray[i].index = used_topSizeBlocks;
TopSizeArray[i].nm_size = nm_size;
TopSizeArray[i].compiler = cType;
TopSizeArray[i].level = comp_lvl;
TopSizeArray[i].type = cbType;
used_topSizeBlocks++;
blob_name = nullptr; // indicate blob_name was consumed
} else { // no room for new entries, current block replaces entry for smallest block
//---< Find last entry (entry for smallest remembered block) >---
// We either want to insert right before the smallest entry, which is when <i>
// indexes the smallest entry. We then just overwrite the smallest entry.
// What's more likely:
// We want to insert somewhere in the list. The smallest entry (@<j>) then falls off the cliff.
// The element at the insert point <i> takes it's slot. The second-smallest entry now becomes smallest.
// Data of the current block is filled in at index <i>.
unsigned int j = i;
unsigned int prev_j = tsbStopper;
unsigned int limit_j = 0;
while (TopSizeArray[j].index != tsbStopper) {
if (limit_j++ >= alloc_topSizeBlocks) {
insane = true; break; // emergency exit
}
if (j >= used_topSizeBlocks) {
insane = true; break; // emergency exit
}
total_iterations++;
prev_j = j;
j = TopSizeArray[j].index;
}
if (!insane) {
if (TopSizeArray[j].blob_name != nullptr) {
os::free((void*)TopSizeArray[j].blob_name);
}
if (prev_j == tsbStopper) {
//---< Above while loop did not iterate, we already are the min entry >---
//---< We have to just replace the smallest entry >---
currMin = hb_len;
currMin_ix = j;
TopSizeArray[j].start = h;
TopSizeArray[j].blob_name = blob_name;
TopSizeArray[j].len = hb_len;
TopSizeArray[j].index = tsbStopper; // already set!!
TopSizeArray[i].nm_size = nm_size;
TopSizeArray[j].compiler = cType;
TopSizeArray[j].level = comp_lvl;
TopSizeArray[j].type = cbType;
} else {
//---< second-smallest entry is now smallest >---
TopSizeArray[prev_j].index = tsbStopper;
currMin = TopSizeArray[prev_j].len;
currMin_ix = prev_j;
//---< previously smallest entry gets overwritten >---
memcpy((void*)&TopSizeArray[j], (void*)&TopSizeArray[i], sizeof(TopSizeBlk));
TopSizeArray[i].start = h;
TopSizeArray[i].blob_name = blob_name;
TopSizeArray[i].len = hb_len;
TopSizeArray[i].index = j;
TopSizeArray[i].nm_size = nm_size;
TopSizeArray[i].compiler = cType;
TopSizeArray[i].level = comp_lvl;
TopSizeArray[i].type = cbType;
}
blob_name = nullptr; // indicate blob_name was consumed
} // insane
}
break;
}
prev_i = i;
}
if (insane) {
// Note: regular analysis could probably continue by resetting "insane" flag.
out->print_cr("Possible loop in TopSizeBlocks list detected. Analysis aborted.");
discard_TopSizeArray(out);
}
}
}
}
if (blob_name != nullptr) {
os::free((void*)blob_name);
blob_name = nullptr;
}
//----------------------------------------------
//---< END register block in TopSizeArray >---
//----------------------------------------------
} else {
nBlocks_zomb++;
}
if (ix_beg == ix_end) {
StatArray[ix_beg].type = cbType;
switch (cbType) {
case nMethod_inuse:
highest_compilation_id = (highest_compilation_id >= compile_id) ? highest_compilation_id : compile_id;
if (comp_lvl < CompLevel_full_optimization) {
nBlocks_t1++;
t1Space += hb_bytelen;
StatArray[ix_beg].t1_count++;
StatArray[ix_beg].t1_space += (unsigned short)hb_len;
StatArray[ix_beg].t1_age = StatArray[ix_beg].t1_age < compile_id ? compile_id : StatArray[ix_beg].t1_age;
} else {
nBlocks_t2++;
t2Space += hb_bytelen;
StatArray[ix_beg].t2_count++;
StatArray[ix_beg].t2_space += (unsigned short)hb_len;
StatArray[ix_beg].t2_age = StatArray[ix_beg].t2_age < compile_id ? compile_id : StatArray[ix_beg].t2_age;
}
StatArray[ix_beg].level = comp_lvl;
StatArray[ix_beg].compiler = cType;
break;
default:
nBlocks_stub++;
stubSpace += hb_bytelen;
StatArray[ix_beg].stub_count++;
StatArray[ix_beg].stub_space += (unsigned short)hb_len;
break;
}
} else {
unsigned int beg_space = (unsigned int)(granule_size - ((char*)h - low_bound - ix_beg*granule_size));
unsigned int end_space = (unsigned int)(hb_bytelen - beg_space - (ix_end-ix_beg-1)*granule_size);
beg_space = beg_space>>log2_seg_size; // store in units of _segment_size
end_space = end_space>>log2_seg_size; // store in units of _segment_size
StatArray[ix_beg].type = cbType;
StatArray[ix_end].type = cbType;
switch (cbType) {
case nMethod_inuse:
highest_compilation_id = (highest_compilation_id >= compile_id) ? highest_compilation_id : compile_id;
if (comp_lvl < CompLevel_full_optimization) {
nBlocks_t1++;
t1Space += hb_bytelen;
StatArray[ix_beg].t1_count++;
StatArray[ix_beg].t1_space += (unsigned short)beg_space;
StatArray[ix_beg].t1_age = StatArray[ix_beg].t1_age < compile_id ? compile_id : StatArray[ix_beg].t1_age;
StatArray[ix_end].t1_count++;
StatArray[ix_end].t1_space += (unsigned short)end_space;
StatArray[ix_end].t1_age = StatArray[ix_end].t1_age < compile_id ? compile_id : StatArray[ix_end].t1_age;
} else {
nBlocks_t2++;
t2Space += hb_bytelen;
StatArray[ix_beg].t2_count++;
StatArray[ix_beg].t2_space += (unsigned short)beg_space;
StatArray[ix_beg].t2_age = StatArray[ix_beg].t2_age < compile_id ? compile_id : StatArray[ix_beg].t2_age;
StatArray[ix_end].t2_count++;
StatArray[ix_end].t2_space += (unsigned short)end_space;
StatArray[ix_end].t2_age = StatArray[ix_end].t2_age < compile_id ? compile_id : StatArray[ix_end].t2_age;
}
StatArray[ix_beg].level = comp_lvl;
StatArray[ix_beg].compiler = cType;
StatArray[ix_end].level = comp_lvl;